From 8fe2181c57c6ecfbe88d6141d4480b1f6c5fe066 Mon Sep 17 00:00:00 2001
From: Gabriel <liwenqiang@selectdb.com>
Date: Tue, 30 Jun 2026 22:04:13 +0800
Subject: [PATCH] [feature](be) Add file scanner v2 readers

### What problem does this PR solve?

Issue Number: close #xxx

Related PR: #63893

Problem Summary: Add the file scanner v2 reader stack for external file scans, including native readers for Parquet, CSV/TEXT, JSON, JNI-backed table readers, schema projection, column mapping, predicate handling, reader statistics, page cache support, and related BE/FE integration. This also restores affected Parquet LZO regression cases by adding Doris thirdparty Arrow LZO page decompression support for file scanner v2.

The change keeps VDirectInPredicate source-compatible with existing ordinary two-argument construction by defaulting the new HybridSet child-type flag to true. Dictionary-code rewrites can still pass false explicitly, while existing runtime filter tests continue to compile with the old call shape.

Review follow-up fixes make RuntimeFilterExpr global-index slot rewriting update the executable _impl tree, document enable_file_scanner_v2 as default-on to match the FE default, and trim generated regression outputs so diff hygiene passes.

### Release note

Support file scanner v2 readers for external file scan paths, including LZO-compressed Parquet reads in the new Parquet reader path.

### Check List (For Author)

- Test: Manual test
    - Verified apache-arrow-17.0.0-lzo.patch applies with patch -p1 --dry-run against Arrow 17 column_reader.cc
    - Ran bash -n thirdparty/build-thirdparty.sh thirdparty/download-thirdparty.sh
    - Ran build-support/clang-format.sh
    - Ran git diff --check
    - Attempted ./run-be-ut.sh --run --filter='RuntimeFilterExprSamplingTest.deep_clone_clones_impl_tree'; local sandboxed run could not complete because the BE UT script required JDK 17 setup first, then needed submodule metadata writes and GitHub access for thirdparty dependencies. Escalated retry was not approved before timeout.
    - Attempted ./run-be-ut.sh --run --filter='FileScannerV2Test.RewriteSlotRefsToGlobalIndexMatrix'; local sandboxed run could not complete because the BE UT script needed submodule metadata writes and GitHub access for thirdparty dependencies. Escalated retry was not approved before timeout.
    - Full BE unit tests and external regression tests were not run in this local environment
- Behavior changed: Yes. Adds file scanner v2 reader behavior and enables LZO-compressed Parquet reads through the new reader path
- Does this need documentation: No
---
 .gitignore                                    |    1 +
 be/cmake/thirdparty.cmake                     |    1 +
 be/src/core/data_type/data_type_timestamptz.h |    4 +
 .../data_type_datetimev2_serde.cpp            |  143 +
 .../data_type_datetimev2_serde.h              |    2 +
 .../data_type_datev2_serde.cpp                |   24 +
 .../data_type_serde/data_type_datev2_serde.h  |    2 +
 .../data_type_decimal_serde.cpp               |  152 +
 .../data_type_serde/data_type_decimal_serde.h |    2 +
 .../data_type_nullable_serde.cpp              |   37 +-
 .../data_type_nullable_serde.h                |    2 +
 .../data_type_number_serde.cpp                |  170 +
 .../data_type_serde/data_type_number_serde.h  |    3 +
 .../core/data_type_serde/data_type_serde.cpp  |   48 +
 be/src/core/data_type_serde/data_type_serde.h |    9 +
 .../data_type_string_serde.cpp                |   42 +
 .../data_type_serde/data_type_string_serde.h  |    3 +
 .../data_type_serde/data_type_time_serde.cpp  |   48 +
 .../data_type_serde/data_type_time_serde.h    |    2 +
 .../data_type_timestamptz_serde.cpp           |   85 +
 .../data_type_timestamptz_serde.h             |    4 +
 .../data_type_serde/decoded_column_view.h     |  105 +
 be/src/exec/operator/file_scan_operator.cpp   |   29 +-
 be/src/exec/operator/file_scan_operator.h     |    2 +
 be/src/exec/operator/result_sink_operator.h   |    2 +-
 be/src/exec/scan/access_path_parser.cpp       |  479 ++
 be/src/exec/scan/access_path_parser.h         |   41 +
 be/src/exec/scan/file_scanner.cpp             |   28 +-
 be/src/exec/scan/file_scanner.h               |    4 +-
 be/src/exec/scan/file_scanner_v2.cpp          |  835 ++++
 be/src/exec/scan/file_scanner_v2.h            |  162 +
 be/src/exec/scan/split_source_connector.h     |   29 +
 .../sink/writer/vhive_partition_writer.cpp    |    2 +
 be/src/exprs/runtime_filter_expr.cpp          |   13 +-
 be/src/exprs/runtime_filter_expr.h            |    6 +
 be/src/exprs/short_circuit_evaluation_expr.h  |   35 +-
 be/src/exprs/vbloom_predicate.h               |    7 +
 be/src/exprs/vcase_expr.h                     |   11 +
 be/src/exprs/vcast_expr.h                     |   12 +
 be/src/exprs/vcolumn_ref.h                    |   13 +
 be/src/exprs/vcompound_pred.h                 |    5 +
 be/src/exprs/vcondition_expr.h                |   15 +
 be/src/exprs/vdirect_in_predicate.h           |    9 +-
 be/src/exprs/vectorized_fn_call.cpp           |    4 +-
 be/src/exprs/vectorized_fn_call.h             |    6 +
 be/src/exprs/vexpr.cpp                        |   59 +-
 be/src/exprs/vexpr.h                          |   19 +-
 be/src/exprs/vin_predicate.h                  |    9 +
 be/src/exprs/vliteral.cpp                     |    6 -
 be/src/exprs/vliteral.h                       |   23 +-
 be/src/exprs/vslot_ref.cpp                    |   41 +-
 be/src/exprs/vslot_ref.h                      |   15 +-
 be/src/exprs/vtopn_pred.h                     |    5 +
 be/src/format/CMakeLists.txt                  |    3 +
 be/src/format/csv/csv_reader.cpp              |    4 +-
 be/src/format/generic_reader.h                |   12 +-
 be/src/format/json/new_json_reader.cpp        |    4 +-
 be/src/format/native/native_reader.cpp        |    4 +-
 be/src/format/orc/vorc_reader.cpp             |    4 +-
 be/src/format/parquet/vparquet_reader.cpp     |    4 +-
 .../format/table/deletion_vector_reader.cpp   |   19 +-
 be/src/format/table/deletion_vector_reader.h  |   35 +-
 be/src/format/table/iceberg_reader_mixin.h    |    3 -
 be/src/format_v2/column_data.h                |  410 ++
 be/src/format_v2/column_mapper.cpp            | 2029 ++++++++
 be/src/format_v2/column_mapper.h              |  294 ++
 be/src/format_v2/column_mapper_nested.cpp     | 1050 +++++
 be/src/format_v2/column_mapper_nested.h       |  105 +
 .../format_v2/delimited_text/csv_reader.cpp   |  295 ++
 be/src/format_v2/delimited_text/csv_reader.h  |   73 +
 .../delimited_text/delimited_text_reader.cpp  |  644 +++
 .../delimited_text/delimited_text_reader.h    |  176 +
 .../format_v2/delimited_text/text_reader.cpp  |  164 +
 be/src/format_v2/delimited_text/text_reader.h |   62 +
 be/src/format_v2/expr/cast.cpp                |  131 +
 be/src/format_v2/expr/cast.h                  |   68 +
 be/src/format_v2/expr/delete_predicate.cpp    |  122 +
 be/src/format_v2/expr/delete_predicate.h      |   60 +
 .../expr/equality_delete_predicate.cpp        |  159 +
 .../expr/equality_delete_predicate.h          |   71 +
 be/src/format_v2/file_reader.cpp              |  209 +
 be/src/format_v2/file_reader.h                |  400 ++
 be/src/format_v2/jni/hudi_jni_reader.cpp      |  167 +
 be/src/format_v2/jni/hudi_jni_reader.h        |   43 +
 .../jni/iceberg_sys_table_reader.cpp          |   76 +
 .../format_v2/jni/iceberg_sys_table_reader.h  |   40 +
 be/src/format_v2/jni/jdbc_reader.cpp          |  187 +
 be/src/format_v2/jni/jdbc_reader.h            |   56 +
 be/src/format_v2/jni/jni_table_reader.cpp     |  386 ++
 be/src/format_v2/jni/jni_table_reader.h       |  117 +
 .../format_v2/jni/max_compute_jni_reader.cpp  |  149 +
 be/src/format_v2/jni/max_compute_jni_reader.h |   51 +
 be/src/format_v2/jni/paimon_jni_reader.cpp    |   93 +
 be/src/format_v2/jni/paimon_jni_reader.h      |   40 +
 .../jni/trino_connector_jni_reader.cpp        |  141 +
 .../jni/trino_connector_jni_reader.h          |   44 +
 be/src/format_v2/json/json_reader.cpp         | 1123 +++++
 be/src/format_v2/json/json_reader.h           |  179 +
 be/src/format_v2/materialized_reader_util.cpp |   89 +
 be/src/format_v2/materialized_reader_util.h   |   63 +
 be/src/format_v2/native/native_reader.cpp     |  311 ++
 be/src/format_v2/native/native_reader.h       |   70 +
 .../parquet/parquet_column_schema.cpp         |  492 ++
 .../format_v2/parquet/parquet_column_schema.h |   80 +
 .../parquet/parquet_file_context.cpp          |  442 ++
 .../format_v2/parquet/parquet_file_context.h  |   99 +
 be/src/format_v2/parquet/parquet_profile.cpp  |  191 +
 be/src/format_v2/parquet/parquet_profile.h    |  140 +
 be/src/format_v2/parquet/parquet_reader.cpp   |  674 +++
 be/src/format_v2/parquet/parquet_reader.h     |   92 +
 be/src/format_v2/parquet/parquet_scan.cpp     |  648 +++
 be/src/format_v2/parquet/parquet_scan.h       |  182 +
 .../format_v2/parquet/parquet_statistics.cpp  | 1303 ++++++
 be/src/format_v2/parquet/parquet_statistics.h |  109 +
 be/src/format_v2/parquet/parquet_type.cpp     |  358 ++
 be/src/format_v2/parquet/parquet_type.h       |   82 +
 .../parquet/reader/column_reader.cpp          |  625 +++
 .../format_v2/parquet/reader/column_reader.h  |  200 +
 .../reader/global_rowid_column_reader.cpp     |   84 +
 .../reader/global_rowid_column_reader.h       |   47 +
 .../parquet/reader/list_column_reader.cpp     |  203 +
 .../parquet/reader/list_column_reader.h       |   52 +
 .../parquet/reader/map_column_reader.cpp      |  238 +
 .../parquet/reader/map_column_reader.h        |   56 +
 .../reader/nested_column_materializer.cpp     |   70 +
 .../reader/nested_column_materializer.h       |   45 +
 .../parquet/reader/parquet_leaf_reader.cpp    |  728 +++
 .../parquet/reader/parquet_leaf_reader.h      |  168 +
 .../reader/row_position_column_reader.cpp     |   76 +
 .../reader/row_position_column_reader.h       |   43 +
 .../parquet/reader/scalar_column_reader.cpp   |  315 ++
 .../parquet/reader/scalar_column_reader.h     |   92 +
 .../parquet/reader/struct_column_reader.cpp   |  258 +
 .../parquet/reader/struct_column_reader.h     |   61 +
 be/src/format_v2/parquet/selection_vector.h   |  163 +
 be/src/format_v2/schema_projection.cpp        |  147 +
 be/src/format_v2/schema_projection.h          |   57 +
 be/src/format_v2/table/hive_reader.cpp        |  150 +
 be/src/format_v2/table/hive_reader.h          |   41 +
 be/src/format_v2/table/hudi_reader.cpp        |  163 +
 be/src/format_v2/table/hudi_reader.h          |   78 +
 be/src/format_v2/table/iceberg_reader.cpp     |  797 ++++
 be/src/format_v2/table/iceberg_reader.h       |  175 +
 be/src/format_v2/table/paimon_reader.cpp      |  194 +
 be/src/format_v2/table/paimon_reader.h        |   84 +
 .../format_v2/table/remote_doris_reader.cpp   |  365 ++
 be/src/format_v2/table/remote_doris_reader.h  |  104 +
 .../format_v2/table/schema_history_util.cpp   |  150 +
 be/src/format_v2/table/schema_history_util.h  |   43 +
 be/src/format_v2/table_reader.cpp             |  847 ++++
 be/src/format_v2/table_reader.h               | 1565 +++++++
 be/src/io/file_factory.cpp                    |   13 +-
 be/src/io/file_factory.h                      |    5 +-
 be/src/io/io_common.h                         |    4 +
 be/src/storage/segment/condition_cache.h      |   18 +-
 be/src/util/jni-util.h                        |    8 +
 be/test/CMakeLists.txt                        |    1 +
 .../data_type_serde_decoded_values_test.cpp   | 1852 ++++++++
 .../data_type_serde_pb_test.cpp               |   14 +-
 .../runtime_filter_expr_sampling_test.cpp     |   46 +
 be/test/exec/scan/access_path_parser_test.cpp |  371 ++
 be/test/exec/scan/file_scanner_v2_test.cpp    |  347 ++
 .../scan/vfile_scanner_exception_test.cpp     |  115 +-
 be/test/format_v2/column_mapper_test.cpp      | 4140 +++++++++++++++++
 .../delimited_text/csv_reader_test.cpp        | 1070 +++++
 .../delimited_text/text_reader_test.cpp       |  965 ++++
 be/test/format_v2/expr/cast_test.cpp          |  172 +
 .../format_v2/expr/delete_predicate_test.cpp  |  168 +
 .../expr/equality_delete_predicate_test.cpp   |  181 +
 be/test/format_v2/json/json_reader_test.cpp   |  608 +++
 .../format_v2/native/native_reader_test.cpp   |  419 ++
 .../parquet/parquet_column_reader_test.cpp    | 3620 ++++++++++++++
 .../parquet/parquet_leaf_reader_test.cpp      |  506 ++
 .../parquet/parquet_page_cache_range_test.cpp |  117 +
 .../parquet/parquet_reader_control_test.cpp   | 1034 ++++
 .../format_v2/parquet/parquet_reader_test.cpp | 2274 +++++++++
 .../format_v2/parquet/parquet_scan_test.cpp   |  804 ++++
 .../format_v2/parquet/parquet_schema_test.cpp |  527 +++
 .../parquet/parquet_serde_reader_test.cpp     |  459 ++
 .../parquet/parquet_statistics_test.cpp       |  460 ++
 .../format_v2/parquet/parquet_type_test.cpp   |  494 ++
 be/test/format_v2/table/hive_reader_test.cpp  |  151 +
 be/test/format_v2/table/hudi_reader_test.cpp  |  182 +
 .../format_v2/table/iceberg_reader_test.cpp   | 1852 ++++++++
 .../format_v2/table/paimon_reader_test.cpp    |  539 +++
 .../table/remote_doris_reader_test.cpp        |  470 ++
 .../format_v2/table_reader_request_test.cpp   |   96 +
 be/test/format_v2/table_reader_test.cpp       | 3826 +++++++++++++++
 docs/doris-iceberg-parquet-api-design.md      |  511 ++
 ...ew-parquet-reader-column-index-refactor.md |  404 ++
 .../new-parquet-reader-ut-improvement-plan.md |  325 ++
 docs/parquet-list-map-compat-design.md        |  664 +++
 .../apache/doris/paimon/PaimonJniScanner.java |    4 +
 .../datasource/hive/HMSExternalTable.java     |    3 +
 .../paimon/source/PaimonScanNode.java         |    4 +
 .../org/apache/doris/qe/SessionVariable.java  |   17 +-
 .../ParquetFileFormatPropertiesTest.java      |    2 +
 gensrc/thrift/Exprs.thrift                    |    4 +
 gensrc/thrift/Opcodes.thrift                  |    2 +
 gensrc/thrift/PaloInternalService.thrift      |    1 +
 gensrc/thrift/PlanNodes.thrift                |    8 +
 .../export_p0/export/test_show_export.out     |  298 +-
 .../test_outfile_parquet_complex_type.out     |   12 +
 .../data/export_p0/test_export_parquet.out    |  198 +-
 .../parquet/test_hive_read_parquet.out        |   24 +-
 ...> test_hive_read_parquet_complex_type.out} |    0
 .../hive/ddl/test_hive_ctas.out               |  200 -
 .../hive/test_complex_types.out               |   48 -
 .../hive/test_external_catalog_hive.out       |  124 -
 .../test_external_catalog_hive_partition.out  |  120 -
 .../hive/test_hive_compress_type.out          |  595 ++-
 .../hive/test_hive_get_schema_from_table.out  |  651 ---
 .../hive/test_hive_openx_json.out             |    1 +
 .../hive/test_hive_schema_evolution.out       |   36 -
 .../hive/write/test_hive_write_insert.out     |  232 -
 .../test_iceberg_export_timestamp_tz.out      |   48 +-
 .../test_paimon_catalog_timestamp_tz.out      |    8 +-
 .../tvf/test_hdfs_parquet_group0.out          |  Bin 23955 -> 26419 bytes
 .../tvf/test_hdfs_parquet_group2.out          |  203 +-
 .../tvf/test_hdfs_parquet_group3.out          |  Bin 11387 -> 11347 bytes
 .../tvf/test_hdfs_parquet_group4.out          |  Bin 106854 -> 106812 bytes
 .../tvf/test_hdfs_parquet_group5.out          |  Bin 613319 -> 613345 bytes
 .../tvf/test_hdfs_parquet_group6.out          |   30 +-
 .../test_outfile_parquet_complex_type.groovy  |   11 +
 .../parquet/test_hive_read_parquet.groovy     |    3 +-
 ...est_hive_read_parquet_complex_type.groovy} |    3 +-
 .../hive/test_hive_compress_type.groovy       |   26 +-
 .../hive/test_hive_date_timezone.groovy       |    1 -
 .../hive/test_parquet_lazy_mat_profile.groovy |    2 +
 .../test_iceberg_optimize_count.groovy        |    6 +-
 .../test_remote_doris_agg_table_select.groovy |    4 +-
 ...st_remote_doris_unique_table_select.groovy |    4 +-
 .../test_remote_doris_variant_select.groovy   |    2 +-
 .../tvf/test_hdfs_parquet_group0.groovy       |    8 +-
 .../tvf/test_hdfs_parquet_group2.groovy       |    5 +-
 .../tvf/test_hdfs_parquet_group4.groovy       |    4 +-
 .../tvf/test_hdfs_parquet_group5.groovy       |    4 +-
 .../tvf/test_hdfs_parquet_group6.groovy       |   14 +-
 thirdparty/build-thirdparty.sh                |    5 +-
 thirdparty/download-thirdparty.sh             |    3 +
 .../patches/apache-arrow-17.0.0-lzo.patch     |   84 +
 thirdparty/vars.sh                            |    8 +-
 242 files changed, 56859 insertions(+), 2273 deletions(-)
 create mode 100644 be/src/core/data_type_serde/decoded_column_view.h
 create mode 100644 be/src/exec/scan/access_path_parser.cpp
 create mode 100644 be/src/exec/scan/access_path_parser.h
 create mode 100644 be/src/exec/scan/file_scanner_v2.cpp
 create mode 100644 be/src/exec/scan/file_scanner_v2.h
 create mode 100644 be/src/format_v2/column_data.h
 create mode 100644 be/src/format_v2/column_mapper.cpp
 create mode 100644 be/src/format_v2/column_mapper.h
 create mode 100644 be/src/format_v2/column_mapper_nested.cpp
 create mode 100644 be/src/format_v2/column_mapper_nested.h
 create mode 100644 be/src/format_v2/delimited_text/csv_reader.cpp
 create mode 100644 be/src/format_v2/delimited_text/csv_reader.h
 create mode 100644 be/src/format_v2/delimited_text/delimited_text_reader.cpp
 create mode 100644 be/src/format_v2/delimited_text/delimited_text_reader.h
 create mode 100644 be/src/format_v2/delimited_text/text_reader.cpp
 create mode 100644 be/src/format_v2/delimited_text/text_reader.h
 create mode 100644 be/src/format_v2/expr/cast.cpp
 create mode 100644 be/src/format_v2/expr/cast.h
 create mode 100644 be/src/format_v2/expr/delete_predicate.cpp
 create mode 100644 be/src/format_v2/expr/delete_predicate.h
 create mode 100644 be/src/format_v2/expr/equality_delete_predicate.cpp
 create mode 100644 be/src/format_v2/expr/equality_delete_predicate.h
 create mode 100644 be/src/format_v2/file_reader.cpp
 create mode 100644 be/src/format_v2/file_reader.h
 create mode 100644 be/src/format_v2/jni/hudi_jni_reader.cpp
 create mode 100644 be/src/format_v2/jni/hudi_jni_reader.h
 create mode 100644 be/src/format_v2/jni/iceberg_sys_table_reader.cpp
 create mode 100644 be/src/format_v2/jni/iceberg_sys_table_reader.h
 create mode 100644 be/src/format_v2/jni/jdbc_reader.cpp
 create mode 100644 be/src/format_v2/jni/jdbc_reader.h
 create mode 100644 be/src/format_v2/jni/jni_table_reader.cpp
 create mode 100644 be/src/format_v2/jni/jni_table_reader.h
 create mode 100644 be/src/format_v2/jni/max_compute_jni_reader.cpp
 create mode 100644 be/src/format_v2/jni/max_compute_jni_reader.h
 create mode 100644 be/src/format_v2/jni/paimon_jni_reader.cpp
 create mode 100644 be/src/format_v2/jni/paimon_jni_reader.h
 create mode 100644 be/src/format_v2/jni/trino_connector_jni_reader.cpp
 create mode 100644 be/src/format_v2/jni/trino_connector_jni_reader.h
 create mode 100644 be/src/format_v2/json/json_reader.cpp
 create mode 100644 be/src/format_v2/json/json_reader.h
 create mode 100644 be/src/format_v2/materialized_reader_util.cpp
 create mode 100644 be/src/format_v2/materialized_reader_util.h
 create mode 100644 be/src/format_v2/native/native_reader.cpp
 create mode 100644 be/src/format_v2/native/native_reader.h
 create mode 100644 be/src/format_v2/parquet/parquet_column_schema.cpp
 create mode 100644 be/src/format_v2/parquet/parquet_column_schema.h
 create mode 100644 be/src/format_v2/parquet/parquet_file_context.cpp
 create mode 100644 be/src/format_v2/parquet/parquet_file_context.h
 create mode 100644 be/src/format_v2/parquet/parquet_profile.cpp
 create mode 100644 be/src/format_v2/parquet/parquet_profile.h
 create mode 100644 be/src/format_v2/parquet/parquet_reader.cpp
 create mode 100644 be/src/format_v2/parquet/parquet_reader.h
 create mode 100644 be/src/format_v2/parquet/parquet_scan.cpp
 create mode 100644 be/src/format_v2/parquet/parquet_scan.h
 create mode 100644 be/src/format_v2/parquet/parquet_statistics.cpp
 create mode 100644 be/src/format_v2/parquet/parquet_statistics.h
 create mode 100644 be/src/format_v2/parquet/parquet_type.cpp
 create mode 100644 be/src/format_v2/parquet/parquet_type.h
 create mode 100644 be/src/format_v2/parquet/reader/column_reader.cpp
 create mode 100644 be/src/format_v2/parquet/reader/column_reader.h
 create mode 100644 be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp
 create mode 100644 be/src/format_v2/parquet/reader/global_rowid_column_reader.h
 create mode 100644 be/src/format_v2/parquet/reader/list_column_reader.cpp
 create mode 100644 be/src/format_v2/parquet/reader/list_column_reader.h
 create mode 100644 be/src/format_v2/parquet/reader/map_column_reader.cpp
 create mode 100644 be/src/format_v2/parquet/reader/map_column_reader.h
 create mode 100644 be/src/format_v2/parquet/reader/nested_column_materializer.cpp
 create mode 100644 be/src/format_v2/parquet/reader/nested_column_materializer.h
 create mode 100644 be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp
 create mode 100644 be/src/format_v2/parquet/reader/parquet_leaf_reader.h
 create mode 100644 be/src/format_v2/parquet/reader/row_position_column_reader.cpp
 create mode 100644 be/src/format_v2/parquet/reader/row_position_column_reader.h
 create mode 100644 be/src/format_v2/parquet/reader/scalar_column_reader.cpp
 create mode 100644 be/src/format_v2/parquet/reader/scalar_column_reader.h
 create mode 100644 be/src/format_v2/parquet/reader/struct_column_reader.cpp
 create mode 100644 be/src/format_v2/parquet/reader/struct_column_reader.h
 create mode 100644 be/src/format_v2/parquet/selection_vector.h
 create mode 100644 be/src/format_v2/schema_projection.cpp
 create mode 100644 be/src/format_v2/schema_projection.h
 create mode 100644 be/src/format_v2/table/hive_reader.cpp
 create mode 100644 be/src/format_v2/table/hive_reader.h
 create mode 100644 be/src/format_v2/table/hudi_reader.cpp
 create mode 100644 be/src/format_v2/table/hudi_reader.h
 create mode 100644 be/src/format_v2/table/iceberg_reader.cpp
 create mode 100644 be/src/format_v2/table/iceberg_reader.h
 create mode 100644 be/src/format_v2/table/paimon_reader.cpp
 create mode 100644 be/src/format_v2/table/paimon_reader.h
 create mode 100644 be/src/format_v2/table/remote_doris_reader.cpp
 create mode 100644 be/src/format_v2/table/remote_doris_reader.h
 create mode 100644 be/src/format_v2/table/schema_history_util.cpp
 create mode 100644 be/src/format_v2/table/schema_history_util.h
 create mode 100644 be/src/format_v2/table_reader.cpp
 create mode 100644 be/src/format_v2/table_reader.h
 create mode 100644 be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp
 create mode 100644 be/test/exec/scan/access_path_parser_test.cpp
 create mode 100644 be/test/exec/scan/file_scanner_v2_test.cpp
 create mode 100644 be/test/format_v2/column_mapper_test.cpp
 create mode 100644 be/test/format_v2/delimited_text/csv_reader_test.cpp
 create mode 100644 be/test/format_v2/delimited_text/text_reader_test.cpp
 create mode 100644 be/test/format_v2/expr/cast_test.cpp
 create mode 100644 be/test/format_v2/expr/delete_predicate_test.cpp
 create mode 100644 be/test/format_v2/expr/equality_delete_predicate_test.cpp
 create mode 100644 be/test/format_v2/json/json_reader_test.cpp
 create mode 100644 be/test/format_v2/native/native_reader_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_column_reader_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_leaf_reader_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_page_cache_range_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_reader_control_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_reader_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_scan_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_schema_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_serde_reader_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_statistics_test.cpp
 create mode 100644 be/test/format_v2/parquet/parquet_type_test.cpp
 create mode 100644 be/test/format_v2/table/hive_reader_test.cpp
 create mode 100644 be/test/format_v2/table/hudi_reader_test.cpp
 create mode 100644 be/test/format_v2/table/iceberg_reader_test.cpp
 create mode 100644 be/test/format_v2/table/paimon_reader_test.cpp
 create mode 100644 be/test/format_v2/table/remote_doris_reader_test.cpp
 create mode 100644 be/test/format_v2/table_reader_request_test.cpp
 create mode 100644 be/test/format_v2/table_reader_test.cpp
 create mode 100644 docs/doris-iceberg-parquet-api-design.md
 create mode 100644 docs/new-parquet-reader-column-index-refactor.md
 create mode 100644 docs/new-parquet-reader-ut-improvement-plan.md
 create mode 100644 docs/parquet-list-map-compat-design.md
 rename regression-test/data/external_table_p0/export/hive_read/parquet/{test_hive_read_parquet_comlex_type.out => test_hive_read_parquet_complex_type.out} (100%)
 rename regression-test/suites/external_table_p0/export/hive_read/parquet/{test_hive_read_parquet_comlex_type.groovy => test_hive_read_parquet_complex_type.groovy} (99%)
 create mode 100644 thirdparty/patches/apache-arrow-17.0.0-lzo.patch

diff --git a/.gitignore b/.gitignore
index 7a61c598c99f75..d3976f46132e8c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,3 +153,4 @@ compile_commands.json
 .github
 
 .worktrees/
+.worktree_initialized
diff --git a/be/cmake/thirdparty.cmake b/be/cmake/thirdparty.cmake
index 1dba907307deca..f8598d785dfed5 100644
--- a/be/cmake/thirdparty.cmake
+++ b/be/cmake/thirdparty.cmake
@@ -66,6 +66,7 @@ add_thirdparty(gmock)
 add_thirdparty(snappy)
 add_thirdparty(curl)
 add_thirdparty(lz4)
+add_thirdparty(lzo2)
 add_thirdparty(thrift)
 add_thirdparty(thriftnb)
 add_thirdparty(crc32c)
diff --git a/be/src/core/data_type/data_type_timestamptz.h b/be/src/core/data_type/data_type_timestamptz.h
index 4a3fba0616cc45..b386402cb49696 100644
--- a/be/src/core/data_type/data_type_timestamptz.h
+++ b/be/src/core/data_type/data_type_timestamptz.h
@@ -56,6 +56,10 @@ class DataTypeTimeStampTz final : public DataTypeNumberBase<PrimitiveType::TYPE_
         return "TimeStampTz(" + std::to_string(_scale) + ")";
     }
 
+    void to_protobuf(PTypeDesc* ptype, PTypeNode* node, PScalarType* scalar_type) const override {
+        scalar_type->set_scale(_scale);
+    }
+
     void to_pb_column_meta(PColumnMeta* col_meta) const override {
         DataTypeNumberBase<PrimitiveType::TYPE_TIMESTAMPTZ>::to_pb_column_meta(col_meta);
         col_meta->mutable_decimal_param()->set_scale(_scale);
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
index ca84996ea45306..0eb5e4d44a39a1 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.cpp
@@ -28,6 +28,7 @@
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/types.h"
 #include "core/value/vdatetime_value.h"
 #include "exprs/function/cast/cast_to_datetimev2_impl.hpp"
@@ -43,6 +44,95 @@ enum {
 namespace doris {
 static const int64_t micro_to_nano_second = 1000;
 
+namespace {
+
+#pragma pack(1)
+struct DecodedInt96Timestamp {
+    int64_t nanos_of_day;
+    int32_t julian_day;
+
+    int64_t to_timestamp_micros() const {
+        static constexpr int32_t JULIAN_EPOCH_OFFSET_DAYS = 2440588;
+        static constexpr int64_t MICROS_IN_DAY = 86400000000;
+        static constexpr int64_t NANOS_PER_MICROSECOND = 1000;
+        return (julian_day - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY +
+               nanos_of_day / NANOS_PER_MICROSECOND;
+    }
+};
+#pragma pack()
+static_assert(sizeof(DecodedInt96Timestamp) == 12);
+
+Status append_datetimev2_from_epoch_micros(ColumnDateTimeV2::Container& data,
+                                           int64_t timestamp_micros) {
+    static constexpr int64_t MICROS_PER_SECOND = 1000000;
+    static constexpr int64_t MICROS_PER_MINUTE = MICROS_PER_SECOND * 60;
+    static constexpr int64_t MICROS_PER_HOUR = MICROS_PER_MINUTE * 60;
+    static constexpr int64_t MICROS_PER_DAY = MICROS_PER_HOUR * 24;
+    static const int64_t EPOCH_DAYNR = calc_daynr(1970, 1, 1);
+
+    int64_t days_since_epoch = timestamp_micros / MICROS_PER_DAY;
+    int64_t micros_of_day = timestamp_micros % MICROS_PER_DAY;
+    if (micros_of_day < 0) {
+        micros_of_day += MICROS_PER_DAY;
+        --days_since_epoch;
+    }
+
+    const int64_t daynr = EPOCH_DAYNR + days_since_epoch;
+    if (daynr <= 0) {
+        return Status::DataQualityError(
+                "Decoded DATETIMEV2 timestamp is out of range: micros={}, daynr={}",
+                timestamp_micros, daynr);
+    }
+
+    DateV2Value<DateTimeV2ValueType> datetime_value;
+    if (!datetime_value.get_date_from_daynr(static_cast<uint64_t>(daynr))) {
+        return Status::DataQualityError(
+                "Decoded DATETIMEV2 timestamp is out of range: micros={}, daynr={}",
+                timestamp_micros, daynr);
+    }
+
+    const auto hour = static_cast<uint8_t>(micros_of_day / MICROS_PER_HOUR);
+    micros_of_day %= MICROS_PER_HOUR;
+    const auto minute = static_cast<uint8_t>(micros_of_day / MICROS_PER_MINUTE);
+    micros_of_day %= MICROS_PER_MINUTE;
+    const auto second = static_cast<uint16_t>(micros_of_day / MICROS_PER_SECOND);
+    const auto microsecond = static_cast<uint32_t>(micros_of_day % MICROS_PER_SECOND);
+    datetime_value.unchecked_set_time(datetime_value.year(), datetime_value.month(),
+                                      datetime_value.day(), hour, minute, second, microsecond);
+    data.push_back(datetime_value);
+    return Status::OK();
+}
+
+void append_datetimev2_from_utc_epoch_micros(ColumnDateTimeV2::Container& data,
+                                             int64_t timestamp_micros,
+                                             const cctz::time_zone& timezone) {
+    static constexpr int64_t MICROS_PER_SECOND = 1000000;
+
+    int64_t epoch_seconds = timestamp_micros / MICROS_PER_SECOND;
+    int64_t micros_of_second = timestamp_micros % MICROS_PER_SECOND;
+    if (micros_of_second < 0) {
+        micros_of_second += MICROS_PER_SECOND;
+        --epoch_seconds;
+    }
+
+    DateV2Value<DateTimeV2ValueType> datetime_value;
+    datetime_value.from_unixtime(epoch_seconds, timezone);
+    datetime_value.set_microsecond(static_cast<uint32_t>(micros_of_second));
+    data.push_back(datetime_value);
+}
+
+int64_t decoded_timestamp_micros(const DecodedColumnView& view, int64_t value) {
+    if (view.time_unit == DecodedTimeUnit::MILLIS) {
+        return value * 1000;
+    }
+    if (view.time_unit == DecodedTimeUnit::NANOS) {
+        return value / 1000;
+    }
+    return value;
+}
+
+} // namespace
+
 // NOLINTBEGIN(readability-function-size)
 // NOLINTBEGIN(readability-function-cognitive-complexity)
 Status DataTypeDateTimeV2SerDe::from_string_batch(const ColumnString& col_str,
@@ -451,6 +541,59 @@ Status DataTypeDateTimeV2SerDe::read_column_from_arrow(IColumn& column,
     return Status::OK();
 }
 
+Status DataTypeDateTimeV2SerDe::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::INT64 && view.value_kind != DecodedValueKind::INT96) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view,
+                Status::NotSupported("DATETIMEV2 decoded reader expects INT64 or INT96 source"));
+    }
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data = assert_cast<ColumnDateTimeV2&>(column).get_data();
+    const auto old_size = data.size();
+    if (view.value_kind == DecodedValueKind::INT96) {
+        const auto* values = reinterpret_cast<const DecodedInt96Timestamp*>(view.values);
+        static const auto utc_timezone = cctz::utc_time_zone();
+        const auto& timezone = view.timezone == nullptr ? utc_timezone : *view.timezone;
+        for (int64_t row = 0; row < view.row_count; ++row) {
+            if (decoded_column_view_row_is_null(view, row)) {
+                data.push_back(DateV2Value<DateTimeV2ValueType>());
+                continue;
+            }
+            append_datetimev2_from_utc_epoch_micros(data, values[row].to_timestamp_micros(),
+                                                    timezone);
+        }
+        return Status::OK();
+    }
+
+    const auto* values = reinterpret_cast<const int64_t*>(view.values);
+    static const auto utc_timezone = cctz::utc_time_zone();
+    const auto& timezone = view.timezone == nullptr ? utc_timezone : *view.timezone;
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(DateV2Value<DateTimeV2ValueType>());
+            continue;
+        }
+        const int64_t timestamp_micros = decoded_timestamp_micros(view, values[row]);
+        if (view.timestamp_is_adjusted_to_utc) {
+            append_datetimev2_from_utc_epoch_micros(data, timestamp_micros, timezone);
+        } else {
+            auto st = append_datetimev2_from_epoch_micros(data, timestamp_micros);
+            if (!st.ok()) {
+                if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                    decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                    continue;
+                }
+                data.resize(old_size);
+                return st;
+            }
+        }
+    }
+    return Status::OK();
+}
+
 Status DataTypeDateTimeV2SerDe::write_column_to_mysql_binary(const IColumn& column,
                                                              MysqlRowBinaryBuffer& result,
                                                              int64_t row_idx, bool col_const,
diff --git a/be/src/core/data_type_serde/data_type_datetimev2_serde.h b/be/src/core/data_type_serde/data_type_datetimev2_serde.h
index 0389432a621730..34d0373eba1c34 100644
--- a/be/src/core/data_type_serde/data_type_datetimev2_serde.h
+++ b/be/src/core/data_type_serde/data_type_datetimev2_serde.h
@@ -88,6 +88,8 @@ class DataTypeDateTimeV2SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_D
                                  const cctz::time_zone& ctz) const override;
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
 
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.cpp b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
index 1a503af38ef4ce..33e484ef946c01 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.cpp
@@ -27,6 +27,7 @@
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/types.h"
 #include "core/value/vdatetime_value.h"
 #include "exprs/function/cast/cast_to_datev2_impl.hpp"
@@ -124,6 +125,29 @@ Status DataTypeDateV2SerDe::read_column_from_arrow(IColumn& column, const arrow:
     return Status::OK();
 }
 
+Status DataTypeDateV2SerDe::read_column_from_decoded_values(IColumn& column,
+                                                            const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::INT32) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view, Status::NotSupported("DATEV2 decoded reader expects INT32 source"));
+    }
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data = assert_cast<ColumnDateV2&>(column).get_data();
+    const auto* values = reinterpret_cast<const int32_t*>(view.values);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(DateV2Value<DateV2ValueType>());
+            continue;
+        }
+        DateV2Value<DateV2ValueType> date_v2;
+        date_v2.get_date_from_daynr(values[row] + date_threshold);
+        data.push_back(date_v2);
+    }
+    return Status::OK();
+}
+
 Status DataTypeDateV2SerDe::write_column_to_mysql_binary(const IColumn& column,
                                                          MysqlRowBinaryBuffer& result,
                                                          int64_t row_idx, bool col_const,
diff --git a/be/src/core/data_type_serde/data_type_datev2_serde.h b/be/src/core/data_type_serde/data_type_datev2_serde.h
index 0375f9be4b4b23..ff985d61345d5a 100644
--- a/be/src/core/data_type_serde/data_type_datev2_serde.h
+++ b/be/src/core/data_type_serde/data_type_datev2_serde.h
@@ -86,6 +86,8 @@ class DataTypeDateV2SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_DATEV
                                  const cctz::time_zone& ctz) const override;
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override;
diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.cpp b/be/src/core/data_type_serde/data_type_decimal_serde.cpp
index fa21498d9feaf2..c977b3e31251c6 100644
--- a/be/src/core/data_type_serde/data_type_decimal_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_decimal_serde.cpp
@@ -31,6 +31,7 @@
 #include "core/column/column_decimal.h"
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/types.h"
 #include "exec/common/arithmetic_overflow.h"
 #include "exprs/function/cast/cast_to_decimal.h"
@@ -43,6 +44,139 @@
 #include "util/string_parser.hpp"
 
 namespace doris {
+namespace {
+
+template <typename NativeType>
+NativeType decode_big_endian_signed_integer(const uint8_t* data, int length) {
+    if constexpr (std::is_same_v<NativeType, wide::Int256>) {
+        NativeType value = data != nullptr && length > 0 && (data[0] & 0x80) != 0 ? NativeType(-1)
+                                                                                  : NativeType(0);
+        for (int i = 0; i < length; ++i) {
+            value = (value << 8) + NativeType(data[i]);
+        }
+        return value;
+    } else {
+        using UnsignedNativeType =
+                std::conditional_t<std::is_same_v<NativeType, Int128>, unsigned __int128,
+                                   std::make_unsigned_t<NativeType>>;
+        UnsignedNativeType value = data != nullptr && length > 0 && (data[0] & 0x80) != 0
+                                           ? static_cast<UnsignedNativeType>(-1)
+                                           : 0;
+        for (int i = 0; i < length; ++i) {
+            value = static_cast<UnsignedNativeType>((value << 8) | data[i]);
+        }
+        return static_cast<NativeType>(value);
+    }
+}
+
+template <PrimitiveType T>
+bool decoded_decimal_value_fits(const typename PrimitiveTypeTraits<T>::CppType::NativeType& value,
+                                UInt32 precision) {
+    return value >= min_decimal_value<T>(precision).value &&
+           value <= max_decimal_value<T>(precision).value;
+}
+
+template <PrimitiveType T>
+bool decoded_decimal_int_value_fits(Int128 value, UInt32 precision) {
+    using NativeType = typename PrimitiveTypeTraits<T>::CppType::NativeType;
+    if constexpr (std::is_same_v<NativeType, wide::Int256>) {
+        const auto wide_value = wide::Int256(value);
+        return decoded_decimal_value_fits<T>(wide_value, precision);
+    } else {
+        return value >= static_cast<Int128>(min_decimal_value<T>(precision).value) &&
+               value <= static_cast<Int128>(max_decimal_value<T>(precision).value);
+    }
+}
+
+template <PrimitiveType T>
+Status read_decimal_decoded_value(const DecodedColumnView& view, UInt32 precision, int64_t row,
+                                  typename PrimitiveTypeTraits<T>::CppType* result) {
+    using FieldType = typename PrimitiveTypeTraits<T>::CppType;
+    using NativeType = typename FieldType::NativeType;
+    NativeType native_value;
+    if (view.value_kind == DecodedValueKind::INT32) {
+        const auto* values = reinterpret_cast<const int32_t*>(view.values);
+        const auto value = static_cast<Int128>(values[row]);
+        if (!decoded_decimal_int_value_fits<T>(value, precision)) {
+            return Status::DataQualityError("Decoded decimal value is out of range");
+        }
+        native_value = NativeType(value);
+    } else if (view.value_kind == DecodedValueKind::INT64) {
+        const auto* values = reinterpret_cast<const int64_t*>(view.values);
+        const auto value = static_cast<Int128>(values[row]);
+        if (!decoded_decimal_int_value_fits<T>(value, precision)) {
+            return Status::DataQualityError("Decoded decimal value is out of range");
+        }
+        native_value = NativeType(value);
+    } else {
+        const auto& value = (*view.binary_values)[row];
+        const auto length = view.value_kind == DecodedValueKind::FIXED_BINARY
+                                    ? view.fixed_length
+                                    : cast_set<int, size_t, false>(value.size);
+        if (length > static_cast<int>(sizeof(NativeType))) {
+            return Status::DataQualityError("Decoded decimal binary value is too wide: length={}",
+                                            length);
+        }
+        native_value = decode_big_endian_signed_integer<NativeType>(
+                reinterpret_cast<const uint8_t*>(value.data), length);
+    }
+    if (!decoded_decimal_value_fits<T>(native_value, precision)) {
+        return Status::DataQualityError("Decoded decimal value is out of range");
+    }
+    *result = FieldType {native_value};
+    return Status::OK();
+}
+
+template <PrimitiveType T>
+Status read_decimal_decoded_values(IColumn& column, const DecodedColumnView& view,
+                                   UInt32 precision) {
+    if (view.value_kind == DecodedValueKind::INT32 || view.value_kind == DecodedValueKind::INT64) {
+        if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+            return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+        }
+    } else if (view.binary_values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded binary values are null for {}", column.get_name());
+    }
+    auto& data = assert_cast<ColumnDecimal<T>&>(column).get_data();
+    const auto old_size = data.size();
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(typename PrimitiveTypeTraits<T>::CppType());
+            continue;
+        }
+        if (view.value_kind == DecodedValueKind::BINARY ||
+            view.value_kind == DecodedValueKind::FIXED_BINARY) {
+            const auto& value = (*view.binary_values)[row];
+            const auto length = view.value_kind == DecodedValueKind::FIXED_BINARY
+                                        ? view.fixed_length
+                                        : cast_set<int, size_t, false>(value.size);
+            if (value.data == nullptr && length > 0) {
+                if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                    decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                    continue;
+                }
+                return Status::Corruption("Decoded decimal binary value is null for {} at row {}",
+                                          column.get_name(), row);
+            }
+        }
+        typename PrimitiveTypeTraits<T>::CppType value;
+        auto st = read_decimal_decoded_value<T>(view, precision, row, &value);
+        if (!st.ok()) {
+            if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                continue;
+            }
+            data.resize(old_size);
+            st.prepend(fmt::format(
+                    "Failed to decode decimal value for {} at row {}: ", column.get_name(), row));
+            return st;
+        }
+        data.push_back(value);
+    }
+    return Status::OK();
+}
+
+} // namespace
 
 template <PrimitiveType T>
 Status DataTypeDecimalSerDe<T>::from_string_batch(const ColumnString& str, ColumnNullable& column,
@@ -371,6 +505,24 @@ Status DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column,
     return Status::OK();
 }
 
+template <PrimitiveType T>
+Status DataTypeDecimalSerDe<T>::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if constexpr (T == TYPE_DECIMAL32 || T == TYPE_DECIMAL64 || T == TYPE_DECIMAL128I ||
+                  T == TYPE_DECIMAL256) {
+        if (view.value_kind == DecodedValueKind::INT32 ||
+            view.value_kind == DecodedValueKind::INT64 ||
+            view.value_kind == DecodedValueKind::BINARY ||
+            view.value_kind == DecodedValueKind::FIXED_BINARY) {
+            return read_decimal_decoded_values<T>(column, view, precision);
+        }
+    }
+    return decoded_column_view_handle_conversion_failure(
+            column, view,
+            Status::NotSupported("Unsupported decoded values for {} from source kind {}",
+                                 get_name(), static_cast<int>(view.value_kind)));
+}
+
 template <PrimitiveType T>
 Status DataTypeDecimalSerDe<T>::write_column_to_mysql_binary(const IColumn& column,
                                                              MysqlRowBinaryBuffer& result,
diff --git a/be/src/core/data_type_serde/data_type_decimal_serde.h b/be/src/core/data_type_serde/data_type_decimal_serde.h
index 0185672e024718..089835a21be955 100644
--- a/be/src/core/data_type_serde/data_type_decimal_serde.h
+++ b/be/src/core/data_type_serde/data_type_decimal_serde.h
@@ -107,6 +107,8 @@ class DataTypeDecimalSerDe : public DataTypeSerDe {
                                  const cctz::time_zone& ctz) const override;
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override;
diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.cpp b/be/src/core/data_type_serde/data_type_nullable_serde.cpp
index a93f8d6126c7d5..7c6ce46e1cd960 100644
--- a/be/src/core/data_type_serde/data_type_nullable_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_nullable_serde.cpp
@@ -22,7 +22,7 @@
 
 #include <algorithm>
 #include <boost/iterator/iterator_facade.hpp>
-#include <memory>
+#include <vector>
 
 #include "core/assert_cast.h"
 #include "core/column/column.h"
@@ -31,10 +31,12 @@
 #include "core/column/column_vector.h"
 #include "core/data_type_serde/data_type_serde.h"
 #include "core/data_type_serde/data_type_string_serde.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "exprs/function/cast/cast_base.h"
 #include "format/transformer/vcsv_transformer.h"
 #include "util/jsonb_document.h"
 #include "util/jsonb_writer.h"
+#include "util/simd/bits.h"
 
 namespace doris {
 class Arena;
@@ -350,6 +352,39 @@ Status DataTypeNullableSerDe::read_column_from_arrow(IColumn& column,
                                                 ctz);
 }
 
+Status DataTypeNullableSerDe::read_column_from_decoded_values(IColumn& column,
+                                                              const DecodedColumnView& view) const {
+    auto& nullable_column = assert_cast<ColumnNullable&>(column);
+    auto& null_map = nullable_column.get_null_map_data();
+    const auto old_size = null_map.size();
+    auto& nested_column = nullable_column.get_nested_column();
+    const auto old_nested_size = nested_column.size();
+    null_map.resize(null_map.size() + view.row_count);
+    if (view.null_map == nullptr) {
+        // No null value
+        memset(null_map.data() + old_size, 0, view.row_count);
+    } else {
+        // TODO: skip if no null in map
+        auto* dst = null_map.data() + old_size;
+        memcpy(dst, view.null_map, view.row_count);
+        // If there are all null values, we can skip reading nested column and just insert defaults.
+        if (simd::count_zero_num(reinterpret_cast<const int8_t*>(view.null_map), view.row_count) ==
+            0) {
+            nested_column.insert_many_defaults(view.row_count);
+            return Status::OK();
+        }
+    }
+    DecodedColumnView nested_view = view;
+    nested_view.conversion_failure_null_map = &null_map;
+    nested_view.conversion_failure_null_map_offset = old_size;
+    auto st = nested_serde->read_column_from_decoded_values(nested_column, nested_view);
+    if (!st.ok()) {
+        null_map.resize(old_size);
+        nested_column.resize(old_nested_size);
+    }
+    return st;
+}
+
 bool DataTypeNullableSerDe::write_column_to_mysql_text(const IColumn& column, BufferWritable& bw,
                                                        int64_t row_idx,
                                                        const FormatOptions& options) const {
diff --git a/be/src/core/data_type_serde/data_type_nullable_serde.h b/be/src/core/data_type_serde/data_type_nullable_serde.h
index 6e069444483b87..ee1eab51941ecb 100644
--- a/be/src/core/data_type_serde/data_type_nullable_serde.h
+++ b/be/src/core/data_type_serde/data_type_nullable_serde.h
@@ -86,6 +86,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe {
                                  const cctz::time_zone& ctz) const override;
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override;
diff --git a/be/src/core/data_type_serde/data_type_number_serde.cpp b/be/src/core/data_type_serde/data_type_number_serde.cpp
index 2124547c2f89f1..3c99a53b5b07bf 100644
--- a/be/src/core/data_type_serde/data_type_number_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_number_serde.cpp
@@ -20,6 +20,8 @@
 #include <arrow/builder.h>
 
 #include <cstdint>
+#include <limits>
+#include <type_traits>
 
 #include "common/exception.h"
 #include "common/status.h"
@@ -27,6 +29,7 @@
 #include "core/data_type/define_primitive_type.h"
 #include "core/data_type/primitive_type.h"
 #include "core/data_type_serde/data_type_serde.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/packed_int128.h"
 #include "core/types.h"
 #include "core/value/timestamptz_value.h"
@@ -43,6 +46,137 @@
 #include "util/unaligned.h"
 
 namespace doris {
+namespace {
+
+template <typename NativeType>
+const NativeType* decoded_values_as(const DecodedColumnView& view) {
+    return reinterpret_cast<const NativeType*>(view.values);
+}
+
+template <typename DorisCppType, typename SourceType>
+bool decoded_number_value_fits(SourceType value) {
+    if constexpr (std::is_floating_point_v<DorisCppType>) {
+        return true;
+    } else if constexpr (std::is_same_v<DorisCppType, UInt8>) {
+        return value == SourceType(0) || value == SourceType(1);
+    } else if constexpr (std::is_signed_v<SourceType>) {
+        const auto int128_value = static_cast<Int128>(value);
+        return int128_value >= static_cast<Int128>(std::numeric_limits<DorisCppType>::lowest()) &&
+               int128_value <= static_cast<Int128>(std::numeric_limits<DorisCppType>::max());
+    } else {
+        const auto uint128_value = static_cast<unsigned __int128>(value);
+        if constexpr (std::is_signed_v<DorisCppType>) {
+            return uint128_value <=
+                   static_cast<unsigned __int128>(std::numeric_limits<DorisCppType>::max());
+        } else {
+            return uint128_value <=
+                   static_cast<unsigned __int128>(std::numeric_limits<DorisCppType>::max());
+        }
+    }
+}
+
+template <PrimitiveType DorisType, typename SourceType>
+Status read_number_decoded_values(IColumn& column, const DecodedColumnView& view) {
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data =
+            assert_cast<typename PrimitiveTypeTraits<DorisType>::ColumnType&>(column).get_data();
+    const auto old_size = data.size();
+    const auto* values = decoded_values_as<SourceType>(view);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        using DorisCppType = typename PrimitiveTypeTraits<DorisType>::CppType;
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(DorisCppType());
+            continue;
+        }
+        if (!decoded_number_value_fits<DorisCppType>(values[row])) {
+            if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                continue;
+            }
+            data.resize(old_size);
+            return Status::DataQualityError("Decoded value is out of range for {} at row {}",
+                                            column.get_name(), row);
+        }
+        data.push_back(static_cast<DorisCppType>(values[row]));
+    }
+    return Status::OK();
+}
+
+template <PrimitiveType DorisType, typename SourceType, typename LogicalType>
+Status read_logical_integer_decoded_values_as(IColumn& column, const DecodedColumnView& view) {
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data =
+            assert_cast<typename PrimitiveTypeTraits<DorisType>::ColumnType&>(column).get_data();
+    const auto old_size = data.size();
+    const auto* values = decoded_values_as<SourceType>(view);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        using DorisCppType = typename PrimitiveTypeTraits<DorisType>::CppType;
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(DorisCppType());
+            continue;
+        }
+        const auto logical_value = static_cast<LogicalType>(values[row]);
+        if (!decoded_number_value_fits<DorisCppType>(logical_value)) {
+            if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                continue;
+            }
+            data.resize(old_size);
+            return Status::DataQualityError(
+                    "Decoded logical integer value is out of range for {} at row {}",
+                    column.get_name(), row);
+        }
+        data.push_back(static_cast<DorisCppType>(logical_value));
+    }
+    return Status::OK();
+}
+
+template <PrimitiveType DorisType, typename SourceType>
+Status read_integer_decoded_values(IColumn& column, const DecodedColumnView& view) {
+    if (view.logical_integer_bit_width <= 0) {
+        return read_number_decoded_values<DorisType, SourceType>(column, view);
+    }
+
+    if (view.logical_integer_is_signed) {
+        switch (view.logical_integer_bit_width) {
+        case 8:
+            return read_logical_integer_decoded_values_as<DorisType, SourceType, Int8>(column,
+                                                                                       view);
+        case 16:
+            return read_logical_integer_decoded_values_as<DorisType, SourceType, Int16>(column,
+                                                                                        view);
+        case 32:
+            return read_logical_integer_decoded_values_as<DorisType, SourceType, Int32>(column,
+                                                                                        view);
+        case 64:
+            return read_logical_integer_decoded_values_as<DorisType, SourceType, Int64>(column,
+                                                                                        view);
+        default:
+            return Status::NotSupported("Unsupported decoded logical integer bit width {} for {}",
+                                        view.logical_integer_bit_width, column.get_name());
+        }
+    }
+
+    switch (view.logical_integer_bit_width) {
+    case 8:
+        return read_logical_integer_decoded_values_as<DorisType, SourceType, UInt8>(column, view);
+    case 16:
+        return read_logical_integer_decoded_values_as<DorisType, SourceType, UInt16>(column, view);
+    case 32:
+        return read_logical_integer_decoded_values_as<DorisType, SourceType, UInt32>(column, view);
+    case 64:
+        return read_logical_integer_decoded_values_as<DorisType, SourceType, UInt64>(column, view);
+    default:
+        return Status::NotSupported("Unsupported decoded logical integer bit width {} for {}",
+                                    view.logical_integer_bit_width, column.get_name());
+    }
+}
+
+} // namespace
 // Type map的基本结构
 template <typename Key, typename Value, typename... Rest>
 struct TypeMap {
@@ -157,6 +291,42 @@ Status DataTypeNumberSerDe<T>::write_column_to_arrow(const IColumn& column, cons
     return Status::OK();
 }
 
+template <PrimitiveType T>
+Status DataTypeNumberSerDe<T>::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if constexpr (T == TYPE_BOOLEAN) {
+        if (view.value_kind == DecodedValueKind::BOOL) {
+            return read_number_decoded_values<TYPE_BOOLEAN, bool>(column, view);
+        }
+    } else if constexpr (T == TYPE_TINYINT || T == TYPE_SMALLINT || T == TYPE_INT ||
+                         T == TYPE_BIGINT || T == TYPE_LARGEINT) {
+        if (view.value_kind == DecodedValueKind::INT32) {
+            return read_integer_decoded_values<T, int32_t>(column, view);
+        }
+        if (view.value_kind == DecodedValueKind::UINT32) {
+            return read_integer_decoded_values<T, uint32_t>(column, view);
+        }
+        if (view.value_kind == DecodedValueKind::INT64) {
+            return read_integer_decoded_values<T, int64_t>(column, view);
+        }
+        if (view.value_kind == DecodedValueKind::UINT64) {
+            return read_integer_decoded_values<T, uint64_t>(column, view);
+        }
+    } else if constexpr (T == TYPE_FLOAT) {
+        if (view.value_kind == DecodedValueKind::FLOAT) {
+            return read_number_decoded_values<TYPE_FLOAT, float>(column, view);
+        }
+    } else if constexpr (T == TYPE_DOUBLE) {
+        if (view.value_kind == DecodedValueKind::DOUBLE) {
+            return read_number_decoded_values<TYPE_DOUBLE, double>(column, view);
+        }
+    }
+    return decoded_column_view_handle_conversion_failure(
+            column, view,
+            Status::NotSupported("Unsupported decoded values for {} from source kind {}",
+                                 get_name(), static_cast<int>(view.value_kind)));
+}
+
 template <PrimitiveType T>
 Status DataTypeNumberSerDe<T>::deserialize_one_cell_from_json(IColumn& column, Slice& slice,
                                                               const FormatOptions& options) const {
diff --git a/be/src/core/data_type_serde/data_type_number_serde.h b/be/src/core/data_type_serde/data_type_number_serde.h
index b57f9f9d21298d..0e0a3acfc1aed7 100644
--- a/be/src/core/data_type_serde/data_type_number_serde.h
+++ b/be/src/core/data_type_serde/data_type_number_serde.h
@@ -117,6 +117,9 @@ class DataTypeNumberSerDe : public DataTypeSerDe {
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
 
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
+
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& row_buffer,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override;
diff --git a/be/src/core/data_type_serde/data_type_serde.cpp b/be/src/core/data_type_serde/data_type_serde.cpp
index ac688ae6c307a3..728cafab3469fd 100644
--- a/be/src/core/data_type_serde/data_type_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_serde.cpp
@@ -34,6 +34,54 @@
 namespace doris {
 DataTypeSerDe::~DataTypeSerDe() = default;
 
+bool decoded_column_view_can_null_on_conversion_failure(const DecodedColumnView& view) {
+    return !view.enable_strict_mode && view.conversion_failure_null_map != nullptr;
+}
+
+void decoded_column_view_insert_null_on_conversion_failure(IColumn& column,
+                                                           const DecodedColumnView& view,
+                                                           int64_t row) {
+    DORIS_CHECK(decoded_column_view_can_null_on_conversion_failure(view));
+    DORIS_CHECK(row >= 0);
+    DORIS_CHECK(row < view.row_count);
+    DORIS_CHECK(view.conversion_failure_null_map_offset >= 0);
+    const auto null_map_row = view.conversion_failure_null_map_offset + row;
+    DORIS_CHECK(null_map_row >= 0);
+    DORIS_CHECK(static_cast<size_t>(null_map_row) < view.conversion_failure_null_map->size());
+    column.insert_default();
+    (*view.conversion_failure_null_map)[null_map_row] = 1;
+}
+
+Status decoded_column_view_handle_conversion_failure(IColumn& column, const DecodedColumnView& view,
+                                                     const Status& status) {
+    if (!decoded_column_view_can_null_on_conversion_failure(view)) {
+        return status;
+    }
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+    }
+    return Status::OK();
+}
+
+Status DataTypeSerDe::read_column_from_decoded_values(IColumn& column,
+                                                      const DecodedColumnView& view) const {
+    return decoded_column_view_handle_conversion_failure(
+            column, view,
+            Status::NotSupported("read_column_from_decoded_values is not supported for {}",
+                                 get_name()));
+}
+
+Status DataTypeSerDe::read_field_from_decoded_value(const IDataType& data_type, Field* field,
+                                                    const DecodedColumnView& view) const {
+    DORIS_CHECK(field != nullptr);
+    DORIS_CHECK(view.row_count == 1);
+    auto column = data_type.create_column();
+    RETURN_IF_ERROR(read_column_from_decoded_values(*column, view));
+    DORIS_CHECK(column->size() == 1);
+    column->get(0, *field);
+    return Status::OK();
+}
+
 DataTypeSerDeSPtrs create_data_type_serdes(const DataTypes& types) {
     DataTypeSerDeSPtrs serdes;
     serdes.reserve(types.size());
diff --git a/be/src/core/data_type_serde/data_type_serde.h b/be/src/core/data_type_serde/data_type_serde.h
index eb7ce74fbe7e9c..baab90ea8d2a82 100644
--- a/be/src/core/data_type_serde/data_type_serde.h
+++ b/be/src/core/data_type_serde/data_type_serde.h
@@ -27,6 +27,7 @@
 #include "common/cast_set.h"
 #include "common/status.h"
 #include "core/column/column_nullable.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/field.h"
 #include "core/string_buffer.hpp"
 #include "core/types.h"
@@ -485,6 +486,14 @@ class DataTypeSerDe {
                                           int64_t start, int64_t end,
                                           const cctz::time_zone& ctz) const = 0;
 
+    // Read already decoded column values into a Doris column. The input view is format-neutral:
+    // file readers translate their decoder output into DecodedColumnView, while SerDe owns
+    // the Doris-type-specific materialization into IColumn.
+    virtual Status read_column_from_decoded_values(IColumn& column,
+                                                   const DecodedColumnView& view) const;
+    virtual Status read_field_from_decoded_value(const IDataType& data_type, Field* field,
+                                                 const DecodedColumnView& view) const;
+
     // ORC serializer
     virtual Status write_column_to_orc(const std::string& timezone, const IColumn& column,
                                        const NullMap* null_map,
diff --git a/be/src/core/data_type_serde/data_type_string_serde.cpp b/be/src/core/data_type_serde/data_type_string_serde.cpp
index dc7667fefcaf48..4c7c9d02475bf0 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_string_serde.cpp
@@ -22,11 +22,40 @@
 
 #include "core/column/column_string.h"
 #include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "util/jsonb_document_cast.h"
 #include "util/jsonb_utils.h"
 #include "util/jsonb_writer.h"
 
 namespace doris {
+namespace {
+
+template <typename ColumnType>
+Status read_string_decoded_values(IColumn& column, const DecodedColumnView& view) {
+    if (view.binary_values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded binary values are null for {}", column.get_name());
+    }
+    auto& string_column = assert_cast<ColumnType&>(column);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            string_column.insert_default();
+            continue;
+        }
+        const auto& value = (*view.binary_values)[row];
+        if (value.data == nullptr && value.size > 0) {
+            if (decoded_column_view_can_null_on_conversion_failure(view)) {
+                decoded_column_view_insert_null_on_conversion_failure(column, view, row);
+                continue;
+            }
+            return Status::Corruption("Decoded string binary value is null for {} at row {}",
+                                      column.get_name(), row);
+        }
+        string_column.insert_data(value.data, value.size);
+    }
+    return Status::OK();
+}
+
+} // namespace
 
 namespace {
 
@@ -429,6 +458,19 @@ Status DataTypeStringSerDeBase<ColumnType>::read_column_from_arrow(
     return Status::OK();
 }
 
+template <typename ColumnType>
+Status DataTypeStringSerDeBase<ColumnType>::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::BINARY &&
+        view.value_kind != DecodedValueKind::FIXED_BINARY) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view,
+                Status::NotSupported("Unsupported decoded values for {} from source kind {}",
+                                     get_name(), static_cast<int>(view.value_kind)));
+    }
+    return read_string_decoded_values<ColumnType>(column, view);
+}
+
 template <typename ColumnType>
 Status DataTypeStringSerDeBase<ColumnType>::write_column_to_orc(
         const std::string& timezone, const IColumn& column, const NullMap* null_map,
diff --git a/be/src/core/data_type_serde/data_type_string_serde.h b/be/src/core/data_type_serde/data_type_string_serde.h
index 79c8450835d39c..81b80eab4a5cbf 100644
--- a/be/src/core/data_type_serde/data_type_string_serde.h
+++ b/be/src/core/data_type_serde/data_type_string_serde.h
@@ -203,6 +203,9 @@ class DataTypeStringSerDeBase : public DataTypeSerDe {
     Status read_column_from_arrow(IColumn& column, const arrow::Array* arrow_array, int64_t start,
                                   int64_t end, const cctz::time_zone& ctz) const override;
 
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
+
     Status write_column_to_mysql_binary(const IColumn& column, MysqlRowBinaryBuffer& result,
                                         int64_t row_idx, bool col_const,
                                         const FormatOptions& options) const override {
diff --git a/be/src/core/data_type_serde/data_type_time_serde.cpp b/be/src/core/data_type_serde/data_type_time_serde.cpp
index e57fd08a271339..c40e671793c848 100644
--- a/be/src/core/data_type_serde/data_type_time_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_time_serde.cpp
@@ -20,11 +20,38 @@
 #include "core/data_type/data_type_decimal.h"
 #include "core/data_type/data_type_number.h"
 #include "core/data_type/primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/value/time_value.h"
 #include "exprs/function/cast/cast_base.h"
 #include "exprs/function/cast/cast_to_time_impl.hpp"
 
 namespace doris {
+namespace {
+
+TimeValue::TimeType read_time_decoded_value(const DecodedColumnView& view, int64_t row) {
+    int64_t micros = 0;
+    if (view.value_kind == DecodedValueKind::INT32) {
+        const auto* values = reinterpret_cast<const int32_t*>(view.values);
+        micros = static_cast<int64_t>(values[row]) * 1000;
+    } else {
+        const auto* values = reinterpret_cast<const int64_t*>(view.values);
+        micros = values[row];
+        if (view.time_unit == DecodedTimeUnit::MILLIS) {
+            micros *= 1000;
+        } else if (view.time_unit == DecodedTimeUnit::NANOS) {
+            micros /= 1000;
+        }
+    }
+    const bool negative = micros < 0;
+    const int64_t abs_micros = std::abs(micros);
+    return TimeValue::make_time(
+            abs_micros / TimeValue::ONE_HOUR_MICROSECONDS,
+            (abs_micros % TimeValue::ONE_HOUR_MICROSECONDS) / TimeValue::ONE_MINUTE_MICROSECONDS,
+            (abs_micros % TimeValue::ONE_MINUTE_MICROSECONDS) / TimeValue::ONE_SECOND_MICROSECONDS,
+            abs_micros % TimeValue::ONE_SECOND_MICROSECONDS, negative);
+}
+
+} // namespace
 
 Status DataTypeTimeV2SerDe::write_column_to_mysql_binary(const IColumn& column,
                                                          MysqlRowBinaryBuffer& result,
@@ -145,6 +172,27 @@ Status DataTypeTimeV2SerDe::from_string_strict_mode(StringRef& str, IColumn& col
     return Status::OK();
 }
 
+Status DataTypeTimeV2SerDe::read_column_from_decoded_values(IColumn& column,
+                                                            const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::INT32 && view.value_kind != DecodedValueKind::INT64) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view,
+                Status::NotSupported("TIMEV2 decoded reader expects INT32 or INT64 source"));
+    }
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+    auto& data = assert_cast<ColumnTimeV2&>(column).get_data();
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(TimeValue::TimeType());
+            continue;
+        }
+        data.push_back(read_time_decoded_value(view, row));
+    }
+    return Status::OK();
+}
+
 template <typename IntDataType>
 Status DataTypeTimeV2SerDe::from_int_batch(const typename IntDataType::ColumnType& int_col,
                                            ColumnNullable& target_col) const {
diff --git a/be/src/core/data_type_serde/data_type_time_serde.h b/be/src/core/data_type_serde/data_type_time_serde.h
index db703616b497cf..e3fccf379c913a 100644
--- a/be/src/core/data_type_serde/data_type_time_serde.h
+++ b/be/src/core/data_type_serde/data_type_time_serde.h
@@ -67,6 +67,8 @@ class DataTypeTimeV2SerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_TIMEV
     template <typename DecimalDataType>
     Status from_decimal_strict_mode_batch(const typename DecimalDataType::ColumnType& decimal_col,
                                           IColumn& target_col) const;
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
     int get_scale() const override { return _scale; }
 
 protected:
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
index e8c26f6db68e75..abc8b86700023a 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.cpp
@@ -18,14 +18,64 @@
 #include "core/data_type_serde/data_type_timestamptz_serde.h"
 
 #include <arrow/builder.h>
+#include <cctz/time_zone.h>
 
 #include "core/data_type/primitive_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/value/timestamptz_value.h"
 #include "exprs/function/cast/cast_parameters.h"
 #include "exprs/function/cast/cast_to_string.h"
 #include "exprs/function/cast/cast_to_timestamptz.h"
 namespace doris {
 
+namespace {
+
+#pragma pack(1)
+struct DecodedInt96Timestamp {
+    int64_t nanos_of_day;
+    int32_t julian_day;
+
+    int64_t to_timestamp_micros() const {
+        static constexpr int32_t JULIAN_EPOCH_OFFSET_DAYS = 2440588;
+        static constexpr int64_t MICROS_IN_DAY = 86400000000;
+        static constexpr int64_t NANOS_PER_MICROSECOND = 1000;
+        return (julian_day - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY +
+               nanos_of_day / NANOS_PER_MICROSECOND;
+    }
+};
+#pragma pack()
+static_assert(sizeof(DecodedInt96Timestamp) == 12);
+
+void append_timestamptz_from_utc_epoch_micros(ColumnTimeStampTz::Container& data,
+                                              int64_t timestamp_micros) {
+    static constexpr int64_t MICROS_PER_SECOND = 1000000;
+    static const auto UTC = cctz::utc_time_zone();
+
+    int64_t epoch_seconds = timestamp_micros / MICROS_PER_SECOND;
+    int64_t micros_of_second = timestamp_micros % MICROS_PER_SECOND;
+    if (micros_of_second < 0) {
+        micros_of_second += MICROS_PER_SECOND;
+        --epoch_seconds;
+    }
+
+    TimestampTzValue timestamp_tz;
+    timestamp_tz.from_unixtime(epoch_seconds, UTC);
+    timestamp_tz.set_microsecond(static_cast<uint32_t>(micros_of_second));
+    data.push_back(timestamp_tz);
+}
+
+int64_t decoded_timestamp_micros(const DecodedColumnView& view, int64_t value) {
+    if (view.time_unit == DecodedTimeUnit::MILLIS) {
+        return value * 1000;
+    }
+    if (view.time_unit == DecodedTimeUnit::NANOS) {
+        return value / 1000;
+    }
+    return value;
+}
+
+} // namespace
+
 // The implementation of these functions mainly refers to data_type_datetimev2_serde.cpp
 
 Status DataTypeTimeStampTzSerDe::from_string(StringRef& str, IColumn& column,
@@ -246,6 +296,41 @@ Status DataTypeTimeStampTzSerDe::write_column_to_orc(const std::string& timezone
     return Status::OK();
 }
 
+Status DataTypeTimeStampTzSerDe::read_column_from_decoded_values(
+        IColumn& column, const DecodedColumnView& view) const {
+    if (view.value_kind != DecodedValueKind::INT64 && view.value_kind != DecodedValueKind::INT96) {
+        return decoded_column_view_handle_conversion_failure(
+                column, view,
+                Status::NotSupported("TIMESTAMPTZ decoded reader expects INT64 or INT96 source"));
+    }
+    if (view.values == nullptr && decoded_column_view_has_non_null_value(view)) {
+        return Status::Corruption("Decoded value buffer is null for {}", column.get_name());
+    }
+
+    auto& data = assert_cast<ColumnTimeStampTz&>(column).get_data();
+    if (view.value_kind == DecodedValueKind::INT96) {
+        const auto* values = reinterpret_cast<const DecodedInt96Timestamp*>(view.values);
+        for (int64_t row = 0; row < view.row_count; ++row) {
+            if (decoded_column_view_row_is_null(view, row)) {
+                data.push_back(TimestampTzValue());
+                continue;
+            }
+            append_timestamptz_from_utc_epoch_micros(data, values[row].to_timestamp_micros());
+        }
+        return Status::OK();
+    }
+
+    const auto* values = reinterpret_cast<const int64_t*>(view.values);
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (decoded_column_view_row_is_null(view, row)) {
+            data.push_back(TimestampTzValue());
+            continue;
+        }
+        append_timestamptz_from_utc_epoch_micros(data, decoded_timestamp_micros(view, values[row]));
+    }
+    return Status::OK();
+}
+
 std::string DataTypeTimeStampTzSerDe::to_olap_string(const Field& field) const {
     return CastToString::from_timestamptz(field.get<TYPE_TIMESTAMPTZ>(), 6);
 }
diff --git a/be/src/core/data_type_serde/data_type_timestamptz_serde.h b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
index 0a595935d8fdd6..133e37fed33b03 100644
--- a/be/src/core/data_type_serde/data_type_timestamptz_serde.h
+++ b/be/src/core/data_type_serde/data_type_timestamptz_serde.h
@@ -22,6 +22,7 @@
 #include <cstdint>
 
 #include "core/data_type_serde/data_type_number_serde.h"
+#include "core/data_type_serde/decoded_column_view.h"
 #include "core/types.h"
 #include "core/value/time_value.h"
 
@@ -72,6 +73,9 @@ class DataTypeTimeStampTzSerDe : public DataTypeNumberSerDe<PrimitiveType::TYPE_
                                int64_t start, int64_t end, Arena& arena,
                                const FormatOptions& options) const override;
 
+    Status read_column_from_decoded_values(IColumn& column,
+                                           const DecodedColumnView& view) const override;
+
     // Override needed: paired reader skips a scale byte; the inherited number-serde writer omits it.
     void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
                                   int64_t row_num) const override;
diff --git a/be/src/core/data_type_serde/decoded_column_view.h b/be/src/core/data_type_serde/decoded_column_view.h
new file mode 100644
index 00000000000000..9f40f1513dbf93
--- /dev/null
+++ b/be/src/core/data_type_serde/decoded_column_view.h
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <vector>
+
+#include "common/status.h"
+#include "core/column/column_nullable.h"
+#include "core/string_ref.h"
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris {
+
+class IColumn;
+
+// 已解码 column batch 的物理值来源类型。
+// 该枚举只描述通用内存布局，不包含 Parquet/ORC/Arrow 等格式专有类型。
+enum class DecodedValueKind {
+    BOOL,
+    INT32,
+    UINT32,
+    INT64,
+    UINT64,
+    INT96,
+    FLOAT,
+    DOUBLE,
+    BINARY,
+    FIXED_BINARY,
+};
+
+enum class DecodedTimeUnit {
+    UNKNOWN,
+    MILLIS,
+    MICROS,
+    NANOS,
+};
+
+struct DecodedColumnView {
+    DecodedValueKind value_kind = DecodedValueKind::INT32;
+    DecodedTimeUnit time_unit = DecodedTimeUnit::UNKNOWN;
+    int64_t row_count = 0;
+    // Optional logical integer annotation. value_kind still describes the physical buffer layout.
+    int logical_integer_bit_width = -1;
+    int decimal_precision = -1;
+    int decimal_scale = -1;
+    int fixed_length = -1;
+    bool logical_integer_is_signed = true;
+    bool timestamp_is_adjusted_to_utc = false;
+    const uint8_t* values = nullptr;
+    const uint8_t* null_map = nullptr;
+    const std::vector<StringRef>* binary_values = nullptr;
+    const cctz::time_zone* timezone = nullptr;
+    bool enable_strict_mode = false;
+    NullMap* conversion_failure_null_map = nullptr;
+    int64_t conversion_failure_null_map_offset = 0;
+};
+
+inline bool decoded_column_view_row_is_null(const DecodedColumnView& view, int64_t row) {
+    return view.null_map != nullptr && view.null_map[row] != 0;
+}
+
+inline bool decoded_column_view_has_non_null_value(const DecodedColumnView& view) {
+    if (view.null_map == nullptr) {
+        return view.row_count > 0;
+    }
+
+    // TODO(gabriel): optimize null map check with SIMD or bitset if needed.
+    for (int64_t row = 0; row < view.row_count; ++row) {
+        if (view.null_map[row] == 0) {
+            return true;
+        }
+    }
+    return false;
+}
+
+bool decoded_column_view_can_null_on_conversion_failure(const DecodedColumnView& view);
+
+void decoded_column_view_insert_null_on_conversion_failure(IColumn& column,
+                                                           const DecodedColumnView& view,
+                                                           int64_t row);
+
+Status decoded_column_view_handle_conversion_failure(IColumn& column, const DecodedColumnView& view,
+                                                     const Status& status);
+
+} // namespace doris
diff --git a/be/src/exec/operator/file_scan_operator.cpp b/be/src/exec/operator/file_scan_operator.cpp
index 2a87f413a15bd6..d4035d37e27106 100644
--- a/be/src/exec/operator/file_scan_operator.cpp
+++ b/be/src/exec/operator/file_scan_operator.cpp
@@ -24,6 +24,7 @@
 #include "exec/operator/olap_scan_operator.h"
 #include "exec/operator/scan_operator.h"
 #include "exec/scan/file_scanner.h"
+#include "exec/scan/file_scanner_v2.h"
 #include "exec/scan/scanner_context.h"
 #include "format/format_common.h"
 #include "storage/storage_engine.h"
@@ -119,10 +120,32 @@ Status FileScanLocalState::_init_scanners(std::list<ScannerSPtr>* scanners) {
                      _max_scanners);
     shard_num = std::max(shard_num, 1U);
     _kv_cache = std::make_unique<ShardedKVCache>(shard_num);
+    const TFileScanRangeParams* scan_params = nullptr;
+    if (state()->get_query_ctx() != nullptr &&
+        state()->get_query_ctx()->file_scan_range_params_map.count(parent_id()) > 0) {
+        scan_params = &state()->get_query_ctx()->file_scan_range_params_map[parent_id()];
+    } else {
+        scan_params = _split_source->get_params();
+    }
+    const bool is_load =
+            state()->desc_tbl().get_tuple_descriptor(scan_params->src_tuple_id) != nullptr;
+    // TODO: Use scanner v2 for all queries.
+    const bool use_file_scanner_v2 =
+            state()->query_options().__isset.enable_file_scanner_v2 &&
+            state()->query_options().enable_file_scanner_v2 && !is_load &&
+            _split_source->all_scan_ranges_match(*scan_params, FileScannerV2::is_supported);
+    _operator_profile->add_info_string("UseScannerV2", use_file_scanner_v2 ? "true" : "false");
     for (int i = 0; i < _max_scanners; ++i) {
-        std::unique_ptr<FileScanner> scanner = FileScanner::create_unique(
-                state(), this, p._limit, _split_source, _scanner_profile.get(), _kv_cache.get(),
-                &p._colname_to_slot_id);
+        ScannerSPtr scanner;
+        if (use_file_scanner_v2) {
+            scanner = FileScannerV2::create_shared(state(), this, p._limit, _split_source,
+                                                   _scanner_profile.get(), _kv_cache.get(),
+                                                   &p._colname_to_slot_id);
+        } else {
+            scanner = FileScanner::create_shared(state(), this, p._limit, _split_source,
+                                                 _scanner_profile.get(), _kv_cache.get(),
+                                                 &p._colname_to_slot_id);
+        }
         RETURN_IF_ERROR(scanner->init(state(), _conjuncts));
         scanners->push_back(std::move(scanner));
     }
diff --git a/be/src/exec/operator/file_scan_operator.h b/be/src/exec/operator/file_scan_operator.h
index d4e31195a4459a..c47488fa357c77 100644
--- a/be/src/exec/operator/file_scan_operator.h
+++ b/be/src/exec/operator/file_scan_operator.h
@@ -29,6 +29,7 @@
 
 namespace doris {
 class FileScanner;
+class FileScannerV2;
 } // namespace doris
 
 namespace doris {
@@ -56,6 +57,7 @@ class FileScanLocalState final : public ScanLocalState<FileScanLocalState> {
 
 private:
     friend class FileScanner;
+    friend class FileScannerV2;
     PushDownType _should_push_down_bloom_filter() const override {
         return PushDownType::UNACCEPTABLE;
     }
diff --git a/be/src/exec/operator/result_sink_operator.h b/be/src/exec/operator/result_sink_operator.h
index 4ead2985d85162..7ff4b18c9b2817 100644
--- a/be/src/exec/operator/result_sink_operator.h
+++ b/be/src/exec/operator/result_sink_operator.h
@@ -45,7 +45,7 @@ struct ResultFileOptions {
     TParquetCompressionType::type parquet_commpression_type;
     TParquetVersion::type parquet_version;
     bool parquert_disable_dictionary = false;
-    bool enable_int96_timestamps = false;
+    bool enable_int96_timestamps = true;
     //note: use outfile with parquet format, have deprecated 9:schema and 10:file_properties
     //But in order to consider the compatibility when upgrading, so add a bool to check
     //Now the code version is 1.1.2, so when the version is after 1.2, could remove this code.
diff --git a/be/src/exec/scan/access_path_parser.cpp b/be/src/exec/scan/access_path_parser.cpp
new file mode 100644
index 00000000000000..b215212b6d861b
--- /dev/null
+++ b/be/src/exec/scan/access_path_parser.cpp
@@ -0,0 +1,479 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/scan/access_path_parser.h"
+
+#include <fmt/format.h>
+
+#include <algorithm>
+#include <charconv>
+#include <map>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "runtime/descriptors.h"
+#include "util/string_util.h"
+
+namespace doris {
+namespace {
+
+bool is_scanner_materialized_virtual_column(const std::string& column_name) {
+    return column_name == BeConsts::ICEBERG_ROWID_COL;
+}
+
+bool parse_non_negative_int(std::string_view value, int32_t* result) {
+    DORIS_CHECK(result != nullptr);
+    int32_t parsed = -1;
+    const auto* begin = value.data();
+    const auto* end = begin + value.size();
+    const auto [ptr, ec] = std::from_chars(begin, end, parsed);
+    if (ec != std::errc() || ptr != end || parsed < 0) {
+        return false;
+    }
+    *result = parsed;
+    return true;
+}
+
+std::string access_path_to_string(const std::vector<std::string>& path) {
+    return fmt::format("{}", fmt::join(path, "."));
+}
+
+format::ColumnDefinition* find_or_add_child(format::ColumnDefinition* parent, int32_t id,
+                                            std::string name, DataTypePtr type) {
+    DORIS_CHECK(parent != nullptr);
+    for (auto& child : parent->children) {
+        if ((child.has_identifier_field_id() && child.get_identifier_field_id() == id) ||
+            child.name == name) {
+            return &child;
+        }
+    }
+    parent->children.push_back({
+            .identifier = Field::create_field<TYPE_INT>(id),
+            .name = std::move(name),
+            .type = std::move(type),
+            .children = {},
+            .default_expr = nullptr,
+            .is_partition_key = false,
+    });
+    return &parent->children.back();
+}
+
+void inherit_schema_metadata(format::ColumnDefinition* column,
+                             const format::ColumnDefinition* schema_column) {
+    if (column == nullptr || schema_column == nullptr) {
+        return;
+    }
+    column->name_mapping = schema_column->name_mapping;
+}
+
+const format::ColumnDefinition* find_schema_child_by_path(
+        const format::ColumnDefinition* schema_column, const std::string& child_path) {
+    if (schema_column == nullptr) {
+        return nullptr;
+    }
+    int32_t parsed_field_id = -1;
+    if (parse_non_negative_int(child_path, &parsed_field_id)) {
+        const auto child_it = std::ranges::find_if(
+                schema_column->children, [&](const format::ColumnDefinition& child) {
+                    return child.has_identifier_field_id() &&
+                           child.get_identifier_field_id() == parsed_field_id;
+                });
+        return child_it == schema_column->children.end() ? nullptr : &*child_it;
+    }
+    const auto child_it = std::ranges::find_if(schema_column->children, [&](const auto& child) {
+        if (to_lower(child.name) == to_lower(child_path)) {
+            return true;
+        }
+        return std::ranges::any_of(child.name_mapping, [&](const std::string& alias) {
+            return to_lower(alias) == to_lower(child_path);
+        });
+    });
+    return child_it == schema_column->children.end() ? nullptr : &*child_it;
+}
+
+int32_t schema_field_id(const format::ColumnDefinition* schema_column) {
+    if (schema_column == nullptr || !schema_column->has_identifier_field_id()) {
+        return -1;
+    }
+    return schema_column->get_identifier_field_id();
+}
+
+int32_t schema_field_id_or(const format::ColumnDefinition* schema_column, int32_t fallback) {
+    const auto field_id = schema_field_id(schema_column);
+    return field_id >= 0 ? field_id : fallback;
+}
+
+std::string schema_field_name_or(const format::ColumnDefinition* schema_column,
+                                 std::string fallback) {
+    return schema_column == nullptr || schema_column->name.empty() ? std::move(fallback)
+                                                                   : schema_column->name;
+}
+
+struct AccessPathNode {
+    bool project_all = false;
+    std::map<std::string, AccessPathNode> children;
+};
+
+void merge_access_path_node(AccessPathNode* dst, const AccessPathNode& src) {
+    DORIS_CHECK(dst != nullptr);
+    if (dst->project_all) {
+        return;
+    }
+    if (src.project_all) {
+        dst->project_all = true;
+        dst->children.clear();
+        return;
+    }
+    for (const auto& [path, child] : src.children) {
+        merge_access_path_node(&dst->children[path], child);
+    }
+}
+
+void insert_access_path(AccessPathNode* root, const std::vector<std::string>& path,
+                        size_t path_idx) {
+    DORIS_CHECK(root != nullptr);
+    if (root->project_all) {
+        return;
+    }
+    if (path_idx >= path.size()) {
+        root->project_all = true;
+        root->children.clear();
+        return;
+    }
+    insert_access_path(&root->children[path[path_idx]], path, path_idx + 1);
+}
+
+Status build_nested_children_from_access_node(format::ColumnDefinition* column,
+                                              const DataTypePtr& type, const AccessPathNode& node,
+                                              const std::string& path,
+                                              const format::ColumnDefinition* schema_column);
+
+// Expand a full complex-column projection into table-schema children when the table format provides
+// an external/current schema. Without this, `SELECT complex_col` or `SELECT *` leaves
+// ColumnDefinition::children empty, so ColumnMapper treats the root complex column as a scalar
+// mapping and later tries to cast the old file shape to the current table shape directly.
+//
+// Examples:
+//   - STRUCT country/city projected from an old file STRUCT country/population/location should
+//     create children country and city, so city can be materialized as missing/default.
+//   - ARRAY<STRUCT<item, quantity>> should create the array element wrapper and then the element
+//     struct children item and quantity.
+//   - MAP<STRING, STRUCT<full_name, age>> should create semantic children key/value directly, then
+//     expand the value struct children full_name and age. Do not introduce a physical entries
+//     wrapper here: ColumnMapper and TableReader treat MAP children as [key, value].
+Status build_all_nested_children_from_schema(format::ColumnDefinition* column,
+                                             const DataTypePtr& type, const std::string& path,
+                                             const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+
+    const auto nested_type = remove_nullable(type);
+    AccessPathNode project_all;
+    project_all.project_all = true;
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_STRUCT: {
+        const auto& struct_type = assert_cast<const DataTypeStruct&>(*nested_type);
+        for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) {
+            const auto field_name = struct_type.get_element_name(field_idx);
+            const auto* schema_child = find_schema_child_by_path(schema_column, field_name);
+            auto* child = find_or_add_child(
+                    column, schema_field_id_or(schema_child, cast_set<int32_t>(field_idx)),
+                    schema_field_name_or(schema_child, field_name),
+                    struct_type.get_element(field_idx));
+            inherit_schema_metadata(child, schema_child);
+            RETURN_IF_ERROR(build_nested_children_from_access_node(
+                    child, child->type, project_all, path + "." + child->name, schema_child));
+        }
+        return Status::OK();
+    }
+    case TYPE_ARRAY: {
+        const auto& array_type = assert_cast<const DataTypeArray&>(*nested_type);
+        const auto* element_schema = schema_column != nullptr && !schema_column->children.empty()
+                                             ? &schema_column->children[0]
+                                             : nullptr;
+        auto* child = find_or_add_child(column, schema_field_id_or(element_schema, 0), "element",
+                                        array_type.get_nested_type());
+        inherit_schema_metadata(child, element_schema);
+        return build_nested_children_from_access_node(child, child->type, project_all, path + ".*",
+                                                      element_schema);
+    }
+    case TYPE_MAP: {
+        const auto& map_type = assert_cast<const DataTypeMap&>(*nested_type);
+        const auto* key_schema = schema_column != nullptr && !schema_column->children.empty()
+                                         ? &schema_column->children[0]
+                                         : nullptr;
+        const auto* value_schema = schema_column != nullptr && schema_column->children.size() > 1
+                                           ? &schema_column->children[1]
+                                           : nullptr;
+        auto* key_child = find_or_add_child(column, schema_field_id_or(key_schema, 0), "key",
+                                            map_type.get_key_type());
+        inherit_schema_metadata(key_child, key_schema);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(
+                key_child, key_child->type, project_all, path + ".KEYS", key_schema));
+        auto* value_child = find_or_add_child(column, schema_field_id_or(value_schema, 1), "value",
+                                              map_type.get_value_type());
+        inherit_schema_metadata(value_child, value_schema);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(
+                value_child, value_child->type, project_all, path + ".VALUES", value_schema));
+        return Status::OK();
+    }
+    default:
+        return Status::OK();
+    }
+}
+
+Status build_struct_children_from_access_node(format::ColumnDefinition* column,
+                                              const DataTypeStruct& struct_type,
+                                              const AccessPathNode& node, const std::string& path,
+                                              const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    for (const auto& [child_path, child_node] : node.children) {
+        // Struct children are resolved by name or schema field id. We do not treat a numeric
+        // child token as a struct ordinal, because `col.0` becomes ambiguous once the struct
+        // evolves. Position-based access needs a separate design if it is required later.
+        if (child_path == "OFFSET" || child_path == "*" || child_path == "KEYS" ||
+            child_path == "VALUES") {
+            return Status::NotSupported(
+                    "AccessPathParser does not support access path {} for slot {}",
+                    path + "." + child_path, column->name);
+        }
+
+        // Prefer the table/schema ColumnDefinition because it carries field ids and aliases.
+        // Fallback to the struct type name only for formats without external schema metadata.
+        const auto* schema_child = find_schema_child_by_path(schema_column, child_path);
+        int32_t field_id = schema_field_id(schema_child);
+        std::string field_name = schema_child == nullptr ? child_path : schema_child->name;
+        DataTypePtr field_type = schema_child == nullptr ? nullptr : schema_child->type;
+        if (field_id < 0 || field_type == nullptr) {
+            for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) {
+                if (to_lower(struct_type.get_element_name(field_idx)) == to_lower(field_name)) {
+                    field_id = cast_set<int32_t>(field_idx);
+                    field_name = struct_type.get_element_name(field_idx);
+                    field_type = struct_type.get_element(field_idx);
+                    break;
+                }
+            }
+        }
+
+        if (field_id < 0 || field_type == nullptr) {
+            return Status::NotSupported(
+                    "AccessPathParser does not support access path {} for slot {}",
+                    path + "." + child_path, column->name);
+        }
+        // TODO: For TVF Parquet files without field ids, this fallback uses the struct ordinal as
+        // the table child identifier. BY_NAME mapping should instead keep a string identifier and
+        // let TableColumnMapper resolve the file-local child id from the Parquet schema.
+        auto* child = find_or_add_child(column, field_id, field_name, field_type);
+        inherit_schema_metadata(child, schema_child);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(
+                child, child->type, child_node, path + "." + child_path, schema_child));
+    }
+    return Status::OK();
+}
+
+Status build_map_children_from_access_node(format::ColumnDefinition* column,
+                                           const DataTypeMap& map_type, const AccessPathNode& node,
+                                           const std::string& path,
+                                           const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    AccessPathNode key_node;
+    AccessPathNode value_node;
+    bool need_key = false;
+    bool need_value = false;
+
+    for (const auto& [child_path, child_node] : node.children) {
+        if (child_path == "OFFSET") {
+            return Status::NotSupported(
+                    "AccessPathParser does not support access path {} for slot {}",
+                    path + "." + child_path, column->name);
+        }
+        if (child_path == "KEYS") {
+            need_key = true;
+            merge_access_path_node(&key_node, child_node);
+            continue;
+        }
+        if (child_path == "VALUES") {
+            need_key = true;
+            key_node.project_all = true;
+            key_node.children.clear();
+            need_value = true;
+            merge_access_path_node(&value_node, child_node);
+            continue;
+        }
+        if (child_path == "*") {
+            need_key = true;
+            key_node.project_all = true;
+            key_node.children.clear();
+            need_value = true;
+            merge_access_path_node(&value_node, child_node);
+            continue;
+        }
+        return Status::NotSupported("AccessPathParser does not support access path {} for slot {}",
+                                    path + "." + child_path, column->name);
+    }
+    if (need_key && !need_value) {
+        // A key-only MAP projection is not independently materializable yet. FileScannerV2 can
+        // describe a projection such as `m.KEYS`, but the downstream file block -> table block path
+        // still builds a ColumnMap from key column + value column + offsets. If the value child is
+        // omitted here, TableReader/ColumnMapper cannot reconstruct a valid table MAP column even
+        // though the query only needs keys.
+        //
+        // Example:
+        //   SELECT map_keys(m) FROM t;
+        // or
+        //   SELECT * FROM t WHERE array_contains(map_keys(m), 'k1');
+        //
+        // The access path only asks for `m.KEYS`, but the scan still has to read `m.VALUES` as a
+        // temporary full projection until map materialization supports constructing a table MAP
+        // from keys only.
+        need_value = true;
+        value_node.project_all = true;
+        value_node.children.clear();
+    }
+
+    if (!need_key && !need_value) {
+        return Status::OK();
+    }
+
+    const auto* key_schema = schema_column != nullptr && !schema_column->children.empty()
+                                     ? &schema_column->children[0]
+                                     : nullptr;
+    const auto* value_schema = schema_column != nullptr && schema_column->children.size() > 1
+                                       ? &schema_column->children[1]
+                                       : nullptr;
+    if (need_key) {
+        auto* key_child = find_or_add_child(column, schema_field_id_or(key_schema, 0), "key",
+                                            map_type.get_key_type());
+        inherit_schema_metadata(key_child, key_schema);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(key_child, key_child->type, key_node,
+                                                               path + ".KEYS", key_schema));
+    }
+    if (need_value) {
+        auto* value_child = find_or_add_child(column, schema_field_id_or(value_schema, 1), "value",
+                                              map_type.get_value_type());
+        inherit_schema_metadata(value_child, value_schema);
+        RETURN_IF_ERROR(build_nested_children_from_access_node(
+                value_child, value_child->type, value_node, path + ".VALUES", value_schema));
+    }
+    return Status::OK();
+}
+
+Status build_nested_children_from_access_node(format::ColumnDefinition* column,
+                                              const DataTypePtr& type, const AccessPathNode& node,
+                                              const std::string& path,
+                                              const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    if (node.project_all || node.children.empty()) {
+        return build_all_nested_children_from_schema(column, type, path, schema_column);
+    }
+
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_STRUCT:
+        return build_struct_children_from_access_node(
+                column, assert_cast<const DataTypeStruct&>(*nested_type), node, path,
+                schema_column);
+    case TYPE_ARRAY: {
+        if (node.children.size() != 1 || !node.children.contains("*")) {
+            return Status::NotSupported(
+                    "AccessPathParser does not support access path {} for slot {}", path,
+                    column->name);
+        }
+        const auto& array_type = assert_cast<const DataTypeArray&>(*nested_type);
+        const auto* element_schema = schema_column != nullptr && !schema_column->children.empty()
+                                             ? &schema_column->children[0]
+                                             : nullptr;
+        auto* child = find_or_add_child(column, schema_field_id_or(element_schema, 0), "element",
+                                        array_type.get_nested_type());
+        inherit_schema_metadata(child, element_schema);
+        return build_nested_children_from_access_node(child, child->type, node.children.at("*"),
+                                                      path + ".*", element_schema);
+    }
+    case TYPE_MAP:
+        return build_map_children_from_access_node(
+                column, assert_cast<const DataTypeMap&>(*nested_type), node, path, schema_column);
+    default:
+        return Status::NotSupported("AccessPathParser does not support access path {} for slot {}",
+                                    path, column->name);
+    }
+}
+
+} // namespace
+
+Status AccessPathParser::build_nested_children(format::ColumnDefinition* column,
+                                               const std::vector<TColumnAccessPath>& access_paths,
+                                               const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    if (is_scanner_materialized_virtual_column(column->name)) {
+        return Status::OK();
+    }
+    if (!is_complex_type(remove_nullable(column->type)->get_primitive_type())) {
+        return Status::OK();
+    }
+
+    AccessPathNode root;
+    // Build tree for AccessPathNode.
+    // For example, for access paths ["a.b", "a.c", "d"], the tree will be:
+    // root
+    // ├── a
+    // │   ├── b
+    // │   └── c
+    // └── d
+    for (const auto& access_path : access_paths) {
+        // TODO: Support META access paths if needed. Currently AccessPathParser only supports
+        // DATA access paths.
+        if (access_path.type != TAccessPathType::DATA || !access_path.__isset.data_access_path) {
+            return Status::NotSupported(
+                    "AccessPathParser only supports DATA access paths for slot {}", column->name);
+        }
+        const auto& path = access_path.data_access_path.path;
+        if (path.empty()) {
+            insert_access_path(&root, path, 0);
+            continue;
+        }
+        int32_t top_level_id = -1;
+        if (to_lower(path.front()) != to_lower(column->name) &&
+            (!parse_non_negative_int(path.front(), &top_level_id) ||
+             !column->has_identifier_field_id() ||
+             top_level_id != column->get_identifier_field_id())) {
+            return Status::NotSupported("AccessPathParser access path {} does not match slot {}",
+                                        access_path_to_string(path), column->name);
+        }
+        insert_access_path(&root, path, 1);
+    }
+    // Recursively build nested children for the column based on the AccessPathNode tree.
+    return build_nested_children_from_access_node(column, column->type, root, column->name,
+                                                  schema_column);
+}
+
+Status AccessPathParser::build_nested_children(format::ColumnDefinition* column,
+                                               const SlotDescriptor* slot_desc,
+                                               const format::ColumnDefinition* schema_column) {
+    DORIS_CHECK(column != nullptr);
+    DORIS_CHECK(slot_desc != nullptr);
+    return build_nested_children(column, slot_desc->all_access_paths(), schema_column);
+}
+
+} // namespace doris
diff --git a/be/src/exec/scan/access_path_parser.h b/be/src/exec/scan/access_path_parser.h
new file mode 100644
index 00000000000000..1aa4c5b89d492a
--- /dev/null
+++ b/be/src/exec/scan/access_path_parser.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris {
+
+class SlotDescriptor;
+
+class AccessPathParser {
+public:
+    static Status build_nested_children(format::ColumnDefinition* column,
+                                        const SlotDescriptor* slot_desc,
+                                        const format::ColumnDefinition* schema_column);
+
+    static Status build_nested_children(format::ColumnDefinition* column,
+                                        const std::vector<TColumnAccessPath>& access_paths,
+                                        const format::ColumnDefinition* schema_column);
+};
+
+} // namespace doris
diff --git a/be/src/exec/scan/file_scanner.cpp b/be/src/exec/scan/file_scanner.cpp
index 6419ce4f65c5e2..6811efcdd5da6e 100644
--- a/be/src/exec/scan/file_scanner.cpp
+++ b/be/src/exec/scan/file_scanner.cpp
@@ -1078,8 +1078,31 @@ Status FileScanner::_get_next_reader() {
                 _cur_reader = std::move(mc_reader);
             } else if (range.__isset.table_format_params &&
                        range.table_format_params.table_format_type == "paimon") {
-                if (_state->query_options().__isset.enable_paimon_cpp_reader &&
-                    _state->query_options().enable_paimon_cpp_reader) {
+                const auto& paimon_params = range.table_format_params.paimon_params;
+                bool use_paimon_cpp_reader = false;
+                if (paimon_params.__isset.reader_type) {
+                    switch (paimon_params.reader_type) {
+                    case TPaimonReaderType::PAIMON_CPP:
+                        use_paimon_cpp_reader = true;
+                        break;
+                    case TPaimonReaderType::PAIMON_JNI:
+                        break;
+                    case TPaimonReaderType::PAIMON_NATIVE:
+                        return Status::InternalError(
+                                "invalid PAIMON_NATIVE reader_type for paimon FORMAT_JNI split, "
+                                "possibly caused by FE/BE protocol mismatch");
+                    default:
+                        return Status::InternalError(
+                                "unknown paimon reader_type for paimon FORMAT_JNI split, possibly "
+                                "caused by FE/BE protocol mismatch");
+                    }
+                } else {
+                    // TODO: Remove this fallback after all FE versions set TPaimonReaderType.
+                    use_paimon_cpp_reader =
+                            _state->query_options().__isset.enable_paimon_cpp_reader &&
+                            _state->query_options().enable_paimon_cpp_reader;
+                }
+                if (use_paimon_cpp_reader) {
                     auto cpp_reader = PaimonCppReader::create_unique(_file_slot_descs, _state,
                                                                      _profile, range, _params);
                     if (!_is_load && !_push_down_conjuncts.empty()) {
@@ -1771,7 +1794,6 @@ Status FileScanner::_init_expr_ctxes() {
         if (is_file_slot) {
             _is_file_slot.emplace(slot_id);
             _file_slot_descs.emplace_back(it->second);
-            _file_col_names.push_back(it->second->col_name());
         }
 
         _column_descs.push_back(col_desc);
diff --git a/be/src/exec/scan/file_scanner.h b/be/src/exec/scan/file_scanner.h
index fbcbca464a5546..3675fd2449711e 100644
--- a/be/src/exec/scan/file_scanner.h
+++ b/be/src/exec/scan/file_scanner.h
@@ -135,8 +135,6 @@ class FileScanner : public Scanner {
     bool _cur_reader_eof = false;
     // File source slot descriptors
     std::vector<SlotDescriptor*> _file_slot_descs;
-    // col names from _file_slot_descs
-    std::vector<std::string> _file_col_names;
     // Unified column descriptors for init_reader (includes file, partition, missing, synthesized cols)
     std::vector<ColumnDescriptor> _column_descs;
 
@@ -149,6 +147,7 @@ class FileScanner : public Scanner {
     // dest slot name to index in _dest_vexpr_ctx;
     std::unordered_map<std::string, int> _dest_slot_name_to_idx;
     // col name to default value expr
+    // TODO: only used by json reader. Could we delete this?
     std::unordered_map<std::string, VExprContextSPtr> _col_default_value_ctx;
     // the map values of dest slot id to src slot desc
     // if there is not key of dest slot id in dest_sid_to_src_sid_without_trans, it will be set to nullptr
@@ -195,7 +194,6 @@ class FileScanner : public Scanner {
     std::shared_ptr<io::IOContext> _io_ctx;
 
     // Whether to fill partition columns from path, default is true.
-    bool _fill_partition_from_path = true;
     std::unordered_map<std::string, std::tuple<std::string, const SlotDescriptor*>>
             _partition_col_descs;
     std::unordered_map<std::string, bool> _partition_value_is_null;
diff --git a/be/src/exec/scan/file_scanner_v2.cpp b/be/src/exec/scan/file_scanner_v2.cpp
new file mode 100644
index 00000000000000..57791cb2d85f2d
--- /dev/null
+++ b/be/src/exec/scan/file_scanner_v2.cpp
@@ -0,0 +1,835 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/scan/file_scanner_v2.h"
+
+#include <gen_cpp/Exprs_types.h>
+#include <gen_cpp/PlanNodes_types.h>
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/config.h"
+#include "common/consts.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/column/column.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "core/string_ref.h"
+#include "exec/common/util.hpp"
+#include "exec/operator/scan_operator.h"
+#include "exec/scan/access_path_parser.h"
+#include "exprs/runtime_filter_expr.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vslot_ref.h"
+#include "format/format_common.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/jni/iceberg_sys_table_reader.h"
+#include "format_v2/jni/jdbc_reader.h"
+#include "format_v2/jni/max_compute_jni_reader.h"
+#include "format_v2/jni/trino_connector_jni_reader.h"
+#include "format_v2/table/hive_reader.h"
+#include "format_v2/table/hudi_reader.h"
+#include "format_v2/table/iceberg_reader.h"
+#include "format_v2/table/paimon_reader.h"
+#include "format_v2/table/remote_doris_reader.h"
+#include "format_v2/table_reader.h"
+#include "io/fs/file_meta_cache.h"
+#include "io/io_common.h"
+#include "runtime/descriptors.h"
+#include "runtime/exec_env.h"
+#include "runtime/runtime_state.h"
+#include "service/backend_options.h"
+#include "storage/id_manager.h"
+
+namespace doris {
+namespace {
+
+std::string table_format_name(const TFileRangeDesc& range) {
+    return range.__isset.table_format_params ? range.table_format_params.table_format_type
+                                             : "NotSet";
+}
+
+TFileFormatType::type get_range_format_type(const TFileScanRangeParams& params,
+                                            const TFileRangeDesc& range) {
+    return range.__isset.format_type ? range.format_type : params.format_type;
+}
+
+bool is_supported_table_format(const TFileRangeDesc& range) {
+    const auto table_format = table_format_name(range);
+    if (table_format == "hudi" && range.__isset.table_format_params &&
+        range.table_format_params.__isset.hudi_params &&
+        range.table_format_params.hudi_params.__isset.delta_logs &&
+        !range.table_format_params.hudi_params.delta_logs.empty()) {
+        // Hudi MOR splits need log-file merge semantics and must stay on the existing JNI path.
+        // FileScannerV2 currently supports native Parquet data files only.
+        return false;
+    }
+    return table_format == "NotSet" || table_format == "tvf" || table_format == "hive" ||
+           table_format == "iceberg" || table_format == "paimon" || table_format == "hudi";
+}
+
+bool is_supported_arrow_table_format(const TFileRangeDesc& range) {
+    return table_format_name(range) == "remote_doris";
+}
+
+bool is_supported_jni_table_format(const TFileRangeDesc& range) {
+    const auto table_format = table_format_name(range);
+    if (table_format == "paimon") {
+        return range.__isset.table_format_params &&
+               range.table_format_params.__isset.paimon_params &&
+               range.table_format_params.paimon_params.__isset.reader_type &&
+               range.table_format_params.paimon_params.reader_type == TPaimonReaderType::PAIMON_JNI;
+    }
+    return table_format == "jdbc" || table_format == "iceberg" || table_format == "hudi" ||
+           table_format == "max_compute" || table_format == "trino_connector";
+}
+
+bool is_csv_format(TFileFormatType::type format_type) {
+    switch (format_type) {
+    case TFileFormatType::FORMAT_CSV_PLAIN:
+    case TFileFormatType::FORMAT_CSV_GZ:
+    case TFileFormatType::FORMAT_CSV_BZ2:
+    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+    case TFileFormatType::FORMAT_CSV_LZOP:
+    case TFileFormatType::FORMAT_CSV_DEFLATE:
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+    case TFileFormatType::FORMAT_PROTO:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool is_text_format(TFileFormatType::type format_type) {
+    return format_type == TFileFormatType::FORMAT_TEXT;
+}
+
+bool is_json_format(TFileFormatType::type format_type) {
+    return format_type == TFileFormatType::FORMAT_JSON;
+}
+
+bool is_native_format(TFileFormatType::type format_type) {
+    return format_type == TFileFormatType::FORMAT_NATIVE;
+}
+
+bool is_partition_slot(const TFileScanSlotInfo& slot_info, const std::string& column_name) {
+    if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) ||
+        column_name == BeConsts::ICEBERG_ROWID_COL) {
+        return false;
+    }
+    return slot_info.__isset.category ? slot_info.category == TColumnCategory::PARTITION_KEY
+                                      : !slot_info.is_file_slot;
+}
+
+bool is_data_file_slot(const TFileScanSlotInfo& slot_info, const std::string& column_name) {
+    if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) ||
+        column_name == BeConsts::ICEBERG_ROWID_COL) {
+        return false;
+    }
+    // CSV and other non-self-describing formats need FE slot descriptors for only the columns that
+    // are physically read from the file. Partition/default/virtual columns stay in TableReader's
+    // mapping layer and are materialized after the file-local block is read. New FE provides an
+    // explicit category; old FE falls back to `is_file_slot`.
+    if (slot_info.__isset.category) {
+        return slot_info.category == TColumnCategory::REGULAR ||
+               slot_info.category == TColumnCategory::GENERATED;
+    }
+    return slot_info.is_file_slot;
+}
+
+Status rewrite_slot_refs_to_global_index(
+        VExprSPtr* expr,
+        const std::unordered_map<int32_t, format::GlobalIndex>& slot_id_to_global_index) {
+    DORIS_CHECK(expr != nullptr);
+    if (*expr == nullptr) {
+        return Status::OK();
+    }
+    if (auto* runtime_filter = dynamic_cast<RuntimeFilterExpr*>(expr->get());
+        runtime_filter != nullptr) {
+        auto impl = runtime_filter->get_impl();
+        DORIS_CHECK(impl != nullptr);
+        RETURN_IF_ERROR(rewrite_slot_refs_to_global_index(&impl, slot_id_to_global_index));
+        runtime_filter->set_impl(std::move(impl));
+        return Status::OK();
+    }
+    if ((*expr)->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(expr->get());
+        const auto global_index_it = slot_id_to_global_index.find(slot_ref->slot_id());
+        if (global_index_it == slot_id_to_global_index.end()) {
+            DORIS_CHECK(slot_ref->slot_id() >= 0);
+            const auto global_index = format::GlobalIndex(cast_set<size_t>(slot_ref->slot_id()));
+            *expr = VSlotRef::create_shared(cast_set<int>(global_index.value()),
+                                            cast_set<int>(global_index.value()), -1,
+                                            slot_ref->data_type(), slot_ref->column_name());
+            RETURN_IF_ERROR(expr->get()->prepare(nullptr, RowDescriptor(), nullptr));
+            return Status::OK();
+        }
+        const auto global_index = global_index_it->second;
+        *expr = VSlotRef::create_shared(cast_set<int>(global_index.value()),
+                                        cast_set<int>(global_index.value()), -1,
+                                        slot_ref->data_type(), slot_ref->column_name());
+        RETURN_IF_ERROR(expr->get()->prepare(nullptr, RowDescriptor(), nullptr));
+        return Status::OK();
+    }
+    auto children = (*expr)->children();
+    for (auto& child : children) {
+        if (child == nullptr) {
+            continue;
+        }
+        RETURN_IF_ERROR(rewrite_slot_refs_to_global_index(&child, slot_id_to_global_index));
+    }
+    (*expr)->set_children(std::move(children));
+    return Status::OK();
+}
+
+} // namespace
+
+#ifdef BE_TEST
+Status FileScannerV2::TEST_to_file_format(TFileFormatType::type format_type,
+                                          format::FileFormat* file_format) {
+    return _to_file_format(format_type, file_format);
+}
+
+bool FileScannerV2::TEST_is_partition_slot(const TFileScanSlotInfo& slot_info,
+                                           const std::string& column_name) {
+    return is_partition_slot(slot_info, column_name);
+}
+
+bool FileScannerV2::TEST_is_data_file_slot(const TFileScanSlotInfo& slot_info,
+                                           const std::string& column_name) {
+    return is_data_file_slot(slot_info, column_name);
+}
+
+Status FileScannerV2::TEST_rewrite_slot_refs_to_global_index(
+        VExprSPtr* expr,
+        const std::unordered_map<int32_t, format::GlobalIndex>& slot_id_to_global_index) {
+    return rewrite_slot_refs_to_global_index(expr, slot_id_to_global_index);
+}
+#endif
+
+bool FileScannerV2::is_supported(const TFileScanRangeParams& params, const TFileRangeDesc& range) {
+    const auto format_type = get_range_format_type(params, range);
+    if (format_type == TFileFormatType::FORMAT_PARQUET) {
+        return is_supported_table_format(range);
+    } else if (format_type == TFileFormatType::FORMAT_ARROW) {
+        return is_supported_arrow_table_format(range);
+    } else if (format_type == TFileFormatType::FORMAT_JNI) {
+        return is_supported_jni_table_format(range);
+    } else if (is_csv_format(format_type) || is_text_format(format_type) ||
+               is_json_format(format_type) || is_native_format(format_type)) {
+        return is_supported_table_format(range);
+    } else {
+        LOG(WARNING) << "Unsupported file format type " << format_type << " for file scanner v2";
+        return false;
+    }
+}
+
+FileScannerV2::FileScannerV2(RuntimeState* state, FileScanLocalState* local_state, int64_t limit,
+                             std::shared_ptr<SplitSourceConnector> split_source,
+                             RuntimeProfile* profile, ShardedKVCache* kv_cache,
+                             const std::unordered_map<std::string, int>* colname_to_slot_id)
+        : Scanner(state, local_state, limit, profile),
+          _split_source(std::move(split_source)),
+          _kv_cache(kv_cache) {
+    (void)colname_to_slot_id;
+    if (state->get_query_ctx() != nullptr &&
+        state->get_query_ctx()->file_scan_range_params_map.count(local_state->parent_id()) > 0) {
+        _params = &(state->get_query_ctx()->file_scan_range_params_map[local_state->parent_id()]);
+    } else {
+        _params = _split_source->get_params();
+    }
+}
+
+Status FileScannerV2::init(RuntimeState* state, const VExprContextSPtrs& conjuncts) {
+    RETURN_IF_ERROR(Scanner::init(state, conjuncts));
+    _get_block_timer =
+            ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileScannerV2GetBlockTime", 1);
+    _file_counter =
+            ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(), "FileNumber", TUnit::UNIT, 1);
+    _file_read_bytes_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(),
+                                                      "FileReadBytes", TUnit::BYTES, 1);
+    _file_read_calls_counter = ADD_COUNTER_WITH_LEVEL(_local_state->scanner_profile(),
+                                                      "FileReadCalls", TUnit::UNIT, 1);
+    _file_read_time_counter =
+            ADD_TIMER_WITH_LEVEL(_local_state->scanner_profile(), "FileReadTime", 1);
+    _adaptive_batch_predicted_rows_counter = ADD_COUNTER_WITH_LEVEL(
+            _local_state->scanner_profile(), "AdaptiveBatchPredictedRows", TUnit::UNIT, 1);
+    _adaptive_batch_actual_bytes_counter = ADD_COUNTER_WITH_LEVEL(
+            _local_state->scanner_profile(), "AdaptiveBatchActualBytes", TUnit::BYTES, 1);
+    _adaptive_batch_probe_count_counter = ADD_COUNTER_WITH_LEVEL(
+            _local_state->scanner_profile(), "AdaptiveBatchProbeCount", TUnit::UNIT, 1);
+    _file_cache_statistics = std::make_unique<io::FileCacheStatistics>();
+    _file_reader_stats = std::make_unique<io::FileReaderStats>();
+    RETURN_IF_ERROR(_init_io_ctx());
+    _io_ctx->file_cache_stats = _file_cache_statistics.get();
+    _io_ctx->file_reader_stats = _file_reader_stats.get();
+    _io_ctx->is_disposable = _state->query_options().disable_file_cache;
+    return Status::OK();
+}
+
+Status FileScannerV2::_open_impl(RuntimeState* state) {
+    RETURN_IF_CANCELLED(state);
+    RETURN_IF_ERROR(Scanner::_open_impl(state));
+    RETURN_IF_ERROR(_split_source->get_next(&_first_scan_range, &_current_range));
+    if (_first_scan_range) {
+        RETURN_IF_ERROR(_create_table_reader_for_format(_current_range, &_table_reader));
+        DORIS_CHECK(_table_reader != nullptr);
+        RETURN_IF_ERROR(_init_expr_ctxes());
+        RETURN_IF_ERROR(_init_table_reader(_current_range));
+    }
+    return Status::OK();
+}
+
+Status FileScannerV2::_get_block_impl(RuntimeState* state, Block* block, bool* eof) {
+    while (true) {
+        RETURN_IF_CANCELLED(state);
+        if (!_has_prepared_split) {
+            RETURN_IF_ERROR(_prepare_next_split(eof));
+            if (*eof) {
+                return Status::OK();
+            }
+        }
+
+        {
+            SCOPED_TIMER(_get_block_timer);
+            if (_should_run_adaptive_batch_size()) {
+                _table_reader->set_batch_size(_predict_reader_batch_rows());
+            }
+            RETURN_IF_ERROR(_table_reader->get_block(block, eof));
+        }
+        if (*eof) {
+            _state->update_num_finished_scan_range(1);
+            _has_prepared_split = false;
+            *eof = false;
+            continue;
+        }
+        _update_adaptive_batch_size(*block);
+        return Status::OK();
+    }
+}
+
+Status FileScannerV2::_prepare_next_split(bool* eos) {
+    bool has_next = _first_scan_range;
+    if (!_first_scan_range) {
+        RETURN_IF_ERROR(_split_source->get_next(&has_next, &_current_range));
+    }
+    _first_scan_range = false;
+    if (!has_next || _should_stop) {
+        *eos = true;
+        return Status::OK();
+    }
+    DORIS_CHECK(_table_reader != nullptr);
+    _current_range_path = _current_range.path;
+    _init_adaptive_batch_size_state(get_range_format_type(*_params, _current_range));
+    RETURN_IF_ERROR(_prepare_table_reader_split(_current_range));
+    COUNTER_UPDATE(_file_counter, 1);
+    _has_prepared_split = true;
+    *eos = false;
+    return Status::OK();
+}
+
+Status FileScannerV2::_init_table_reader(const TFileRangeDesc& range) {
+    const auto format_type = get_range_format_type(*_params, range);
+    format::FileFormat file_format;
+    RETURN_IF_ERROR(_to_file_format(format_type, &file_format));
+    DORIS_CHECK(_table_reader != nullptr);
+
+    format::TableColumnPredicates table_column_predicates;
+    RETURN_IF_ERROR(_build_table_column_predicates(&table_column_predicates));
+    VExprContextSPtrs table_conjuncts;
+    RETURN_IF_ERROR(_build_table_conjuncts(&table_conjuncts));
+    RETURN_IF_ERROR(_table_reader->init({
+            .projected_columns = _projected_columns,
+            .column_predicates = std::move(table_column_predicates),
+            .conjuncts = std::move(table_conjuncts),
+            .format = file_format,
+            .scan_params = const_cast<TFileScanRangeParams*>(_params),
+            .io_ctx = _io_ctx,
+            .runtime_state = _state,
+            .scanner_profile = _local_state->scanner_profile(),
+            .file_slot_descs = &_file_slot_descs,
+            .push_down_agg_type = _local_state->get_push_down_agg_type(),
+            .condition_cache_digest = _local_state->get_condition_cache_digest(),
+    }));
+    return Status::OK();
+}
+
+Status FileScannerV2::_create_table_reader_for_format(
+        const TFileRangeDesc& range, std::unique_ptr<format::TableReader>* reader) const {
+    DORIS_CHECK(reader != nullptr);
+    const auto table_format = table_format_name(range);
+    if (table_format == "NotSet" || table_format == "tvf") {
+        *reader = std::make_unique<format::TableReader>();
+    } else if (table_format == "hive") {
+        *reader = format::hive::HiveReader::create_unique();
+    } else if (table_format == "iceberg") {
+        if (get_range_format_type(*_params, range) == TFileFormatType::FORMAT_JNI) {
+            *reader = std::make_unique<format::iceberg::IcebergSysTableJniReader>();
+        } else {
+            *reader = std::make_unique<format::iceberg::IcebergTableReader>();
+        }
+    } else if (table_format == "paimon") {
+        *reader = std::make_unique<format::paimon::PaimonHybridReader>();
+    } else if (table_format == "hudi") {
+        *reader = std::make_unique<format::hudi::HudiHybridReader>();
+    } else if (table_format == "jdbc") {
+        *reader = std::make_unique<format::jdbc::JdbcJniReader>();
+    } else if (table_format == "max_compute") {
+        const auto* mc_desc =
+                static_cast<const MaxComputeTableDescriptor*>(_output_tuple_desc->table_desc());
+        RETURN_IF_ERROR(mc_desc->init_status());
+        *reader = std::make_unique<format::max_compute::MaxComputeJniReader>(mc_desc);
+    } else if (table_format == "trino_connector") {
+        *reader = std::make_unique<format::trino_connector::TrinoConnectorJniReader>();
+    } else if (table_format == "remote_doris") {
+        *reader = std::make_unique<format::remote_doris::RemoteDorisReader>();
+    } else {
+        return Status::NotSupported("FileScannerV2 does not support table format {}", table_format);
+    }
+    return Status::OK();
+}
+
+Status FileScannerV2::_prepare_table_reader_split(const TFileRangeDesc& range) {
+    std::map<std::string, Field> partition_values;
+    RETURN_IF_ERROR(_generate_partition_values(range, &partition_values));
+    format::FileFormat current_split_format;
+    RETURN_IF_ERROR(_to_file_format(get_range_format_type(*_params, range), &current_split_format));
+    RETURN_IF_ERROR(_table_reader->prepare_split({
+            .partition_values = std::move(partition_values),
+            .cache = _kv_cache,
+            .current_range = range,
+            .current_split_format = current_split_format,
+            .global_rowid_context = _create_global_rowid_context(range),
+    }));
+    return Status::OK();
+}
+
+bool FileScannerV2::_should_enable_file_meta_cache() const {
+    return ExecEnv::GetInstance()->file_meta_cache()->enabled() &&
+           _split_source->num_scan_ranges() < config::max_external_file_meta_cache_num / 3;
+}
+
+std::optional<format::GlobalRowIdContext> FileScannerV2::_create_global_rowid_context(
+        const TFileRangeDesc& range) const {
+    if (!_need_global_rowid_column) {
+        return std::nullopt;
+    }
+    auto& id_file_map = _state->get_id_file_map();
+    DORIS_CHECK(id_file_map != nullptr);
+    const auto file_id = id_file_map->get_file_mapping_id(
+            std::make_shared<FileMapping>(_local_state->cast<FileScanLocalState>().parent_id(),
+                                          range, _should_enable_file_meta_cache()));
+    return format::GlobalRowIdContext {
+            .version = IdManager::ID_VERSION,
+            .backend_id = BackendOptions::get_backend_id(),
+            .file_id = file_id,
+    };
+}
+
+Status FileScannerV2::_generate_partition_values(
+        const TFileRangeDesc& range, std::map<std::string, Field>* partition_values) const {
+    DORIS_CHECK(partition_values != nullptr);
+    partition_values->clear();
+    if (!range.__isset.columns_from_path_keys || !range.__isset.columns_from_path) {
+        return Status::OK();
+    }
+    DORIS_CHECK(range.columns_from_path_keys.size() == range.columns_from_path.size());
+    for (size_t idx = 0; idx < range.columns_from_path_keys.size(); ++idx) {
+        const auto& key = range.columns_from_path_keys[idx];
+        const auto it = _partition_slot_descs.find(key);
+        if (it == _partition_slot_descs.end()) {
+            continue;
+        }
+        const auto& value = range.columns_from_path[idx];
+        const bool is_null = range.__isset.columns_from_path_is_null &&
+                             idx < range.columns_from_path_is_null.size() &&
+                             range.columns_from_path_is_null[idx];
+        Field field;
+        DORIS_CHECK(it->second.slot_desc != nullptr);
+        RETURN_IF_ERROR(_parse_partition_value(it->second.slot_desc, value, is_null, &field));
+        partition_values->emplace(it->second.canonical_name, std::move(field));
+    }
+    return Status::OK();
+}
+
+Status FileScannerV2::_parse_partition_value(const SlotDescriptor* slot_desc,
+                                             const std::string& value, bool is_null,
+                                             Field* field) const {
+    DORIS_CHECK(slot_desc != nullptr);
+    DORIS_CHECK(field != nullptr);
+    if (is_null) {
+        *field = Field::create_field<TYPE_NULL>(Null());
+        return Status::OK();
+    }
+    const auto data_type = remove_nullable(slot_desc->get_data_type_ptr());
+    auto column = data_type->create_column();
+    auto serde = data_type->get_serde();
+    DataTypeSerDe::FormatOptions options;
+    options.converted_from_string = true;
+    StringRef ref(value.data(), value.size());
+    RETURN_IF_ERROR(serde->from_string(ref, *column, options));
+    DORIS_CHECK(column->size() == 1);
+    *field = (*column)[0];
+    return Status::OK();
+}
+
+Status FileScannerV2::_init_expr_ctxes() {
+    _slot_id_to_desc.clear();
+    _slot_id_to_global_index.clear();
+    _partition_slot_descs.clear();
+    _file_slot_descs.clear();
+    for (const auto* slot_desc : _output_tuple_desc->slots()) {
+        _slot_id_to_desc.emplace(slot_desc->id(), slot_desc);
+    }
+    DORIS_CHECK(_table_reader != nullptr);
+    RETURN_IF_ERROR(_build_projected_columns(*_table_reader));
+    return Status::OK();
+}
+
+Status FileScannerV2::_build_projected_columns(const format::TableReader& table_reader) {
+    _projected_columns.clear();
+    _projected_columns.reserve(_params->required_slots.size());
+    _need_global_rowid_column = false;
+    format::ProjectedColumnBuildContext build_context {
+            .scan_params = _params,
+            .range = &_current_range,
+            .runtime_state = _state,
+    };
+
+    for (size_t slot_idx = 0; slot_idx < _params->required_slots.size(); ++slot_idx) {
+        const auto& slot_info = _params->required_slots[slot_idx];
+        const auto it = _slot_id_to_desc.find(slot_info.slot_id);
+        if (it == _slot_id_to_desc.end()) {
+            return Status::InternalError("Unknown source slot descriptor, slot_id={}",
+                                         slot_info.slot_id);
+        }
+        auto column = _build_table_column(it->second);
+        if (column.name.starts_with(BeConsts::GLOBAL_ROWID_COL)) {
+            _need_global_rowid_column = true;
+        }
+        RETURN_IF_ERROR(_build_default_expr(slot_info, &column.default_expr));
+        build_context.schema_column.reset();
+        RETURN_IF_ERROR(table_reader.annotate_projected_column(slot_info, &build_context, &column));
+        // Build nested children from access paths generated by the slot's access-path
+        // expressions. A projected column can therefore contain only a subset of the schema
+        // column's nested children.
+        RETURN_IF_ERROR(AccessPathParser::build_nested_children(
+                &column, it->second,
+                build_context.schema_column.has_value() ? &*build_context.schema_column : nullptr));
+        if (is_partition_slot(slot_info, column.name)) {
+            column.is_partition_key = true;
+            _partition_slot_descs.emplace(
+                    column.name,
+                    PartitionSlotInfo {.slot_desc = it->second, .canonical_name = column.name});
+            for (const auto& alias : column.name_mapping) {
+                _partition_slot_descs.emplace(
+                        alias,
+                        PartitionSlotInfo {.slot_desc = it->second, .canonical_name = column.name});
+            }
+        } else if (is_data_file_slot(slot_info, column.name)) {
+            _file_slot_descs.push_back(const_cast<SlotDescriptor*>(it->second));
+        }
+        const auto global_index = format::GlobalIndex(slot_idx);
+        _slot_id_to_global_index.emplace(slot_info.slot_id, global_index);
+        _projected_columns.push_back(std::move(column));
+    }
+    RETURN_IF_ERROR(table_reader.validate_projected_columns(build_context));
+    return Status::OK();
+}
+
+Status FileScannerV2::_build_default_expr(const TFileScanSlotInfo& slot_info,
+                                          VExprContextSPtr* ctx) const {
+    DORIS_CHECK(ctx != nullptr);
+    if (slot_info.__isset.default_value_expr && !slot_info.default_value_expr.nodes.empty()) {
+        return VExpr::create_expr_tree(slot_info.default_value_expr, *ctx);
+    }
+
+    if (_params->__isset.default_value_of_src_slot) {
+        const auto it = _params->default_value_of_src_slot.find(slot_info.slot_id);
+        if (it != _params->default_value_of_src_slot.end() && !it->second.nodes.empty()) {
+            return VExpr::create_expr_tree(it->second, *ctx);
+        }
+    }
+    return Status::OK();
+}
+
+format::ColumnDefinition FileScannerV2::_build_table_column(const SlotDescriptor* slot_desc) {
+    DORIS_CHECK(slot_desc != nullptr);
+    format::ColumnDefinition column;
+    // TODO(gabriel): why always BY_NAME here?
+    column.identifier = Field::create_field<TYPE_STRING>(slot_desc->col_name());
+    column.name = slot_desc->col_name();
+    column.type = slot_desc->get_data_type_ptr();
+    return column;
+}
+
+Status FileScannerV2::_build_table_column_predicates(
+        format::TableColumnPredicates* predicates) const {
+    DORIS_CHECK(predicates != nullptr);
+    predicates->clear();
+    const auto& slot_predicates = _local_state->cast<FileScanLocalState>()._slot_id_to_predicates;
+    for (const auto& [slot_id, slot_predicate_list] : slot_predicates) {
+        const auto it = _slot_id_to_desc.find(slot_id);
+        if (it == _slot_id_to_desc.end()) {
+            continue;
+        }
+        const auto global_index_it = _slot_id_to_global_index.find(slot_id);
+        if (global_index_it == _slot_id_to_global_index.end()) {
+            continue;
+        }
+        (*predicates)[global_index_it->second] = slot_predicate_list;
+    }
+    return Status::OK();
+}
+
+Status FileScannerV2::_build_table_conjuncts(VExprContextSPtrs* conjuncts) const {
+    DORIS_CHECK(conjuncts != nullptr);
+    conjuncts->clear();
+    conjuncts->reserve(_conjuncts.size());
+    for (const auto& conjunct : _conjuncts) {
+        VExprSPtr root;
+        RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root));
+        RETURN_IF_ERROR(rewrite_slot_refs_to_global_index(&root, _slot_id_to_global_index));
+        conjuncts->push_back(VExprContext::create_shared(std::move(root)));
+    }
+    return Status::OK();
+}
+
+TFileFormatType::type FileScannerV2::_get_current_format_type() const {
+    return get_range_format_type(*_params, _current_range);
+}
+
+Status FileScannerV2::_to_file_format(TFileFormatType::type format_type,
+                                      format::FileFormat* file_format) {
+    DORIS_CHECK(file_format != nullptr);
+    switch (format_type) {
+    case TFileFormatType::FORMAT_PARQUET:
+        *file_format = format::FileFormat::PARQUET;
+        return Status::OK();
+    case TFileFormatType::FORMAT_JNI:
+        *file_format = format::FileFormat::JNI;
+        return Status::OK();
+    case TFileFormatType::FORMAT_CSV_PLAIN:
+    case TFileFormatType::FORMAT_CSV_GZ:
+    case TFileFormatType::FORMAT_CSV_BZ2:
+    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+    case TFileFormatType::FORMAT_CSV_LZOP:
+    case TFileFormatType::FORMAT_CSV_DEFLATE:
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+    case TFileFormatType::FORMAT_PROTO:
+        *file_format = format::FileFormat::CSV;
+        return Status::OK();
+    case TFileFormatType::FORMAT_TEXT:
+        *file_format = format::FileFormat::TEXT;
+        return Status::OK();
+    case TFileFormatType::FORMAT_JSON:
+        *file_format = format::FileFormat::JSON;
+        return Status::OK();
+    case TFileFormatType::FORMAT_NATIVE:
+        *file_format = format::FileFormat::NATIVE;
+        return Status::OK();
+    case TFileFormatType::FORMAT_ARROW:
+        *file_format = format::FileFormat::ARROW;
+        return Status::OK();
+    default:
+        return Status::NotSupported("FileScannerV2 does not support file format {}",
+                                    to_string(format_type));
+    }
+}
+
+Status FileScannerV2::_init_io_ctx() {
+    _io_ctx = std::make_shared<io::IOContext>();
+    _io_ctx->query_id = &_state->query_id();
+    return Status::OK();
+}
+
+void FileScannerV2::_reset_adaptive_batch_size_state() {
+    _block_size_predictor.reset();
+    COUNTER_SET(_adaptive_batch_predicted_rows_counter, int64_t(0));
+    COUNTER_SET(_adaptive_batch_actual_bytes_counter, int64_t(0));
+}
+
+void FileScannerV2::_init_adaptive_batch_size_state(TFileFormatType::type format_type) {
+    _reset_adaptive_batch_size_state();
+    if (!_should_enable_adaptive_batch_size(format_type)) {
+        return;
+    }
+
+    // V2 native file readers do not have reliable row-width hints before the first batch. Start
+    // every split with a small probe, then learn bytes-per-row from the materialized table block
+    // and keep later batches close to RuntimeState::preferred_block_size_bytes().
+    _block_size_predictor = std::make_unique<AdaptiveBlockSizePredictor>(
+            _state->preferred_block_size_bytes(), 0.0, ADAPTIVE_BATCH_INITIAL_PROBE_ROWS,
+            _state->batch_size());
+}
+
+bool FileScannerV2::_should_enable_adaptive_batch_size(TFileFormatType::type format_type) const {
+    if (!config::enable_adaptive_batch_size) {
+        return false;
+    }
+    switch (format_type) {
+    case TFileFormatType::FORMAT_PARQUET:
+    case TFileFormatType::FORMAT_ORC:
+    case TFileFormatType::FORMAT_CSV_PLAIN:
+    case TFileFormatType::FORMAT_CSV_GZ:
+    case TFileFormatType::FORMAT_CSV_BZ2:
+    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+    case TFileFormatType::FORMAT_CSV_LZOP:
+    case TFileFormatType::FORMAT_CSV_DEFLATE:
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+    case TFileFormatType::FORMAT_PROTO:
+    case TFileFormatType::FORMAT_TEXT:
+    case TFileFormatType::FORMAT_JSON:
+    case TFileFormatType::FORMAT_JNI:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool FileScannerV2::_should_run_adaptive_batch_size() const {
+    // COUNT pushdown emits synthetic rows from file metadata and does not materialize file columns,
+    // so there is no useful row-width sample to learn from.
+    return _block_size_predictor != nullptr &&
+           _local_state->get_push_down_agg_type() != TPushAggOp::type::COUNT;
+}
+
+size_t FileScannerV2::_predict_reader_batch_rows() {
+    DORIS_CHECK(_block_size_predictor != nullptr);
+    // Before history exists this returns the probe row count; after update(), it returns roughly
+    // preferred_block_size_bytes / EWMA(bytes_per_row), capped by RuntimeState::batch_size().
+    const size_t predicted_rows = _block_size_predictor->predict_next_rows();
+    COUNTER_SET(_adaptive_batch_predicted_rows_counter, static_cast<int64_t>(predicted_rows));
+    return predicted_rows;
+}
+
+void FileScannerV2::_update_adaptive_batch_size(const Block& block) {
+    if (!_should_run_adaptive_batch_size()) {
+        return;
+    }
+    COUNTER_SET(_adaptive_batch_actual_bytes_counter, static_cast<int64_t>(block.bytes()));
+    if (block.rows() == 0) {
+        return;
+    }
+    // The sample is taken after TableReader has finalized file-local columns to table columns.
+    // This matches the memory shape seen by upstream operators and catches very wide nested
+    // columns, such as map/string payloads, after the first probe batch.
+    if (!_block_size_predictor->has_history()) {
+        COUNTER_UPDATE(_adaptive_batch_probe_count_counter, 1);
+    }
+    _block_size_predictor->update(block);
+}
+
+Status FileScannerV2::close(RuntimeState* state) {
+    if (!_try_close()) {
+        return Status::OK();
+    }
+    if (_table_reader != nullptr) {
+        RETURN_IF_ERROR(_table_reader->close());
+        _report_condition_cache_profile();
+        _table_reader.reset();
+    }
+    return Scanner::close(state);
+}
+
+void FileScannerV2::try_stop() {
+    Scanner::try_stop();
+    if (_io_ctx) {
+        _io_ctx->should_stop = true;
+    }
+}
+
+void FileScannerV2::update_realtime_counters() {
+    if (_file_reader_stats == nullptr) {
+        return;
+    }
+    const int64_t bytes_read = _file_reader_stats->read_bytes;
+    COUNTER_SET(_file_read_bytes_counter, bytes_read);
+    COUNTER_SET(_file_read_calls_counter, cast_set<int64_t>(_file_reader_stats->read_calls));
+    COUNTER_SET(_file_read_time_counter, cast_set<int64_t>(_file_reader_stats->read_time_ns));
+}
+
+void FileScannerV2::_collect_profile_before_close() {
+    _report_file_reader_predicate_filtered_rows();
+    Scanner::_collect_profile_before_close();
+    if (_file_reader_stats != nullptr) {
+        COUNTER_SET(_file_read_bytes_counter, cast_set<int64_t>(_file_reader_stats->read_bytes));
+        COUNTER_SET(_file_read_calls_counter, cast_set<int64_t>(_file_reader_stats->read_calls));
+        COUNTER_SET(_file_read_time_counter, cast_set<int64_t>(_file_reader_stats->read_time_ns));
+    }
+    // Query profiles can be collected before Scanner::close() runs. Publish condition-cache
+    // counters here as well, using deltas so this method and close() cannot double count.
+    _report_condition_cache_profile();
+}
+
+bool FileScannerV2::_should_update_load_counters() const {
+    if (_is_load) {
+        return true;
+    }
+    // TVF based loads (e.g. http_stream, group commit relay) plan the load source as a
+    // tvf query scan without src tuple desc, so _is_load is false. But rows filtered by
+    // the load's WHERE clause still need to be reported as unselected rows. FILE_STREAM
+    // is only reachable from such load entries, never from normal queries, so use it to
+    // identify these scanners.
+    return (_params != nullptr && _params->__isset.file_type &&
+            _params->file_type == TFileType::FILE_STREAM) ||
+           (_current_range.__isset.file_type && _current_range.file_type == TFileType::FILE_STREAM);
+}
+
+void FileScannerV2::_report_file_reader_predicate_filtered_rows() {
+    const int64_t filtered_rows = _io_ctx != nullptr ? _io_ctx->predicate_filtered_rows : 0;
+    const int64_t filtered_delta = filtered_rows - _reported_predicate_filtered_rows;
+    if (filtered_delta > 0) {
+        // File readers can evaluate localized conjuncts before a block reaches Scanner. Count
+        // those rows as scanner-level unselected rows so load statistics stay identical no matter
+        // whether a predicate is pushed down or evaluated by Scanner::_filter_output_block().
+        _counter.num_rows_unselected += filtered_delta;
+        _reported_predicate_filtered_rows = filtered_rows;
+    }
+}
+
+void FileScannerV2::_report_condition_cache_profile() {
+    auto* local_state = static_cast<FileScanLocalState*>(_local_state);
+    const int64_t hit_count =
+            _table_reader != nullptr ? _table_reader->condition_cache_hit_count() : 0;
+    const int64_t hit_delta = hit_count - _reported_condition_cache_hit_count;
+    if (hit_delta > 0) {
+        COUNTER_UPDATE(local_state->_condition_cache_hit_counter, hit_delta);
+        _reported_condition_cache_hit_count = hit_count;
+    }
+    const int64_t filtered_rows = _io_ctx != nullptr ? _io_ctx->condition_cache_filtered_rows : 0;
+    const int64_t filtered_delta = filtered_rows - _reported_condition_cache_filtered_rows;
+    if (filtered_delta > 0) {
+        COUNTER_UPDATE(local_state->_condition_cache_filtered_rows_counter, filtered_delta);
+        _reported_condition_cache_filtered_rows = filtered_rows;
+    }
+}
+
+} // namespace doris
diff --git a/be/src/exec/scan/file_scanner_v2.h b/be/src/exec/scan/file_scanner_v2.h
new file mode 100644
index 00000000000000..bc493bfbd85c59
--- /dev/null
+++ b/be/src/exec/scan/file_scanner_v2.h
@@ -0,0 +1,162 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "common/factory_creator.h"
+#include "common/status.h"
+#include "core/block/block.h"
+#include "exec/operator/file_scan_operator.h"
+#include "exec/scan/scanner.h"
+#include "exec/scan/split_source_connector.h"
+#include "exprs/vexpr_fwd.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Descriptors_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+#include "storage/segment/adaptive_block_size_predictor.h"
+
+namespace doris {
+
+class RuntimeState;
+class SlotDescriptor;
+class TFileRangeDesc;
+class TFileScanRangeParams;
+class ShardedKVCache;
+
+class FileScannerV2 final : public Scanner {
+    ENABLE_FACTORY_CREATOR(FileScannerV2);
+
+public:
+    static constexpr const char* NAME = "FileScannerV2";
+    static constexpr size_t ADAPTIVE_BATCH_INITIAL_PROBE_ROWS = 32;
+
+    static bool is_supported(const TFileScanRangeParams& params, const TFileRangeDesc& range);
+#ifdef BE_TEST
+    static Status TEST_to_file_format(TFileFormatType::type format_type,
+                                      format::FileFormat* file_format);
+    static bool TEST_is_partition_slot(const TFileScanSlotInfo& slot_info,
+                                       const std::string& column_name);
+    static bool TEST_is_data_file_slot(const TFileScanSlotInfo& slot_info,
+                                       const std::string& column_name);
+    static Status TEST_rewrite_slot_refs_to_global_index(
+            VExprSPtr* expr,
+            const std::unordered_map<int32_t, format::GlobalIndex>& slot_id_to_global_index);
+#endif
+
+    FileScannerV2(RuntimeState* state, FileScanLocalState* parent, int64_t limit,
+                  std::shared_ptr<SplitSourceConnector> split_source, RuntimeProfile* profile,
+                  ShardedKVCache* kv_cache,
+                  const std::unordered_map<std::string, int>* colname_to_slot_id);
+
+    Status init(RuntimeState* state, const VExprContextSPtrs& conjuncts) override;
+    Status _open_impl(RuntimeState* state) override;
+    Status close(RuntimeState* state) override;
+    void try_stop() override;
+    std::string get_name() override { return FileScannerV2::NAME; }
+    std::string get_current_scan_range_name() override { return _current_range_path; }
+    void update_realtime_counters() override;
+
+protected:
+    Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override;
+    void _collect_profile_before_close() override;
+    bool _should_update_load_counters() const override;
+
+private:
+    TFileFormatType::type _get_current_format_type() const;
+    Status _init_io_ctx();
+    Status _init_expr_ctxes();
+    Status _prepare_next_split(bool* eos);
+    Status _init_table_reader(const TFileRangeDesc& range);
+    Status _create_table_reader_for_format(const TFileRangeDesc& range,
+                                           std::unique_ptr<format::TableReader>* reader) const;
+    Status _prepare_table_reader_split(const TFileRangeDesc& range);
+    bool _should_enable_file_meta_cache() const;
+    std::optional<format::GlobalRowIdContext> _create_global_rowid_context(
+            const TFileRangeDesc& range) const;
+    Status _generate_partition_values(const TFileRangeDesc& range,
+                                      std::map<std::string, Field>* partition_values) const;
+    Status _parse_partition_value(const SlotDescriptor* slot_desc, const std::string& value,
+                                  bool is_null, Field* field) const;
+    Status _build_projected_columns(const format::TableReader& table_reader);
+    Status _build_default_expr(const TFileScanSlotInfo& slot_info, VExprContextSPtr* ctx) const;
+    static format::ColumnDefinition _build_table_column(const SlotDescriptor* slot_desc);
+    Status _build_table_column_predicates(format::TableColumnPredicates* predicates) const;
+    Status _build_table_conjuncts(VExprContextSPtrs* conjuncts) const;
+    static Status _to_file_format(TFileFormatType::type format_type,
+                                  format::FileFormat* file_format);
+    void _reset_adaptive_batch_size_state();
+    void _init_adaptive_batch_size_state(TFileFormatType::type format_type);
+    bool _should_enable_adaptive_batch_size(TFileFormatType::type format_type) const;
+    bool _should_run_adaptive_batch_size() const;
+    size_t _predict_reader_batch_rows();
+    void _update_adaptive_batch_size(const Block& block);
+    void _report_file_reader_predicate_filtered_rows();
+    void _report_condition_cache_profile();
+
+    struct PartitionSlotInfo {
+        const SlotDescriptor* slot_desc = nullptr;
+        std::string canonical_name;
+    };
+
+    const TFileScanRangeParams* _params = nullptr;
+    std::shared_ptr<SplitSourceConnector> _split_source;
+    bool _first_scan_range = false;
+    bool _has_prepared_split = false;
+    TFileRangeDesc _current_range;
+    std::string _current_range_path;
+
+    std::unique_ptr<format::TableReader> _table_reader;
+    std::vector<format::ColumnDefinition> _projected_columns;
+    // File formats without embedded schema, such as CSV, still need the FE slot descriptors in
+    // file-column order. This mirrors old FileScanner::_file_slot_descs and is passed only to
+    // readers that cannot derive their schema from file metadata.
+    std::vector<SlotDescriptor*> _file_slot_descs;
+    bool _need_global_rowid_column = false;
+    std::unordered_map<int32_t, const SlotDescriptor*> _slot_id_to_desc;
+    std::unordered_map<int32_t, format::GlobalIndex> _slot_id_to_global_index;
+    std::unordered_map<std::string, PartitionSlotInfo> _partition_slot_descs;
+
+    std::unique_ptr<io::FileCacheStatistics> _file_cache_statistics;
+    std::unique_ptr<io::FileReaderStats> _file_reader_stats;
+    std::shared_ptr<io::IOContext> _io_ctx;
+    ShardedKVCache* _kv_cache = nullptr;
+
+    RuntimeProfile::Counter* _get_block_timer = nullptr;
+    RuntimeProfile::Counter* _file_counter = nullptr;
+    RuntimeProfile::Counter* _file_read_bytes_counter = nullptr;
+    RuntimeProfile::Counter* _file_read_calls_counter = nullptr;
+    RuntimeProfile::Counter* _file_read_time_counter = nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_predicted_rows_counter = nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_actual_bytes_counter = nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_probe_count_counter = nullptr;
+    std::unique_ptr<AdaptiveBlockSizePredictor> _block_size_predictor;
+    int64_t _reported_predicate_filtered_rows = 0;
+    int64_t _reported_condition_cache_hit_count = 0;
+    int64_t _reported_condition_cache_filtered_rows = 0;
+};
+
+} // namespace doris
diff --git a/be/src/exec/scan/split_source_connector.h b/be/src/exec/scan/split_source_connector.h
index 5926baff303cbf..320f6f90d0dd02 100644
--- a/be/src/exec/scan/split_source_connector.h
+++ b/be/src/exec/scan/split_source_connector.h
@@ -17,6 +17,8 @@
 
 #pragma once
 
+#include <functional>
+
 #include "common/config.h"
 #include "core/custom_allocator.h"
 #include "runtime/runtime_state.h"
@@ -45,6 +47,15 @@ class SplitSourceConnector {
 
     virtual TFileScanRangeParams* get_params() = 0;
 
+    virtual bool all_scan_ranges_match(
+            const TFileScanRangeParams& params,
+            const std::function<bool(const TFileScanRangeParams&, const TFileRangeDesc&)>&
+                    predicate) {
+        (void)params;
+        (void)predicate;
+        return false;
+    }
+
 protected:
     template <typename T, typename V1 = std::vector<T>, typename V2 = std::vector<T>>
         requires(std::is_same_v<std::remove_cvref_t<V1>,
@@ -125,6 +136,24 @@ class LocalSplitSourceConnector : public SplitSourceConnector {
         throw Exception(
                 Status::FatalError("Unreachable, params is got by file_scan_range_params_map"));
     }
+
+    bool all_scan_ranges_match(
+            const TFileScanRangeParams& params,
+            const std::function<bool(const TFileScanRangeParams&, const TFileRangeDesc&)>&
+                    predicate) override {
+        if (_scan_ranges.empty()) {
+            return false;
+        }
+        for (const auto& scan_range : _scan_ranges) {
+            const auto& file_scan_range = scan_range.scan_range.ext_scan_range.file_scan_range;
+            for (const auto& range : file_scan_range.ranges) {
+                if (!predicate(params, range)) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
 };
 
 /**
diff --git a/be/src/exec/sink/writer/vhive_partition_writer.cpp b/be/src/exec/sink/writer/vhive_partition_writer.cpp
index 5e2582ceb5f8fc..8331efac54bd47 100644
--- a/be/src/exec/sink/writer/vhive_partition_writer.cpp
+++ b/be/src/exec/sink/writer/vhive_partition_writer.cpp
@@ -93,6 +93,8 @@ Status VHivePartitionWriter::open(RuntimeState* state, RuntimeProfile* operator_
                                          to_string(_hive_compress_type));
         }
         }
+        // TODO: INT96 is kept for Hive 2/3 compatibility. Add an explicit option before
+        // changing the default Hive parquet timestamp encoding to standard logical types.
         ParquetFileOptions parquet_options = {parquet_compression_type,
                                               TParquetVersion::PARQUET_1_0, false, true};
         _file_format_transformer = std::make_unique<VParquetTransformer>(
diff --git a/be/src/exprs/runtime_filter_expr.cpp b/be/src/exprs/runtime_filter_expr.cpp
index f0c1f67c12a6c3..584707fddc6a9a 100644
--- a/be/src/exprs/runtime_filter_expr.cpp
+++ b/be/src/exprs/runtime_filter_expr.cpp
@@ -68,6 +68,17 @@ RuntimeFilterExpr::RuntimeFilterExpr(const TExprNode& node, VExprSPtr impl, doub
     DORIS_CHECK(_impl != nullptr);
 }
 
+Status RuntimeFilterExpr::clone_node(VExprSPtr* cloned_expr) const {
+    DORIS_CHECK(cloned_expr != nullptr);
+    DORIS_CHECK(_impl != nullptr);
+    VExprSPtr cloned_impl;
+    RETURN_IF_ERROR(_impl->deep_clone(&cloned_impl));
+    *cloned_expr = RuntimeFilterExpr::create_shared(clone_texpr_node(), std::move(cloned_impl),
+                                                    _ignore_thredhold, _null_aware, _filter_id,
+                                                    _sampling_frequency);
+    return Status::OK();
+}
+
 Status RuntimeFilterExpr::prepare(RuntimeState* state, const RowDescriptor& desc,
                                   VExprContext* context) {
     RETURN_IF_ERROR_OR_PREPARED(_impl->prepare(state, desc, context));
@@ -92,7 +103,7 @@ void RuntimeFilterExpr::close(VExprContext* context, FunctionContext::FunctionSt
 Status RuntimeFilterExpr::execute_column_impl(VExprContext* context, const Block* block,
                                               const Selector* selector, size_t count,
                                               ColumnPtr& result_column) const {
-    return Status::InternalError("Not implement RuntimeFilterExpr::execute_column_impl");
+    return _impl->execute_column(context, block, selector, count, result_column);
 }
 
 const std::string& RuntimeFilterExpr::expr_name() const {
diff --git a/be/src/exprs/runtime_filter_expr.h b/be/src/exprs/runtime_filter_expr.h
index efbe55878f24b9..7994d2a71ae14f 100644
--- a/be/src/exprs/runtime_filter_expr.h
+++ b/be/src/exprs/runtime_filter_expr.h
@@ -24,6 +24,7 @@
 #include <memory>
 #include <set>
 #include <string>
+#include <utility>
 
 #include "common/config.h"
 #include "common/status.h"
@@ -81,6 +82,8 @@ class RuntimeFilterExpr final : public VExpr {
     }
 
     VExprSPtr get_impl() const override { return _impl; }
+    void set_impl(VExprSPtr impl) { _impl = std::move(impl); }
+    Status clone_node(VExprSPtr* cloned_expr) const override;
 
     void attach_profile_counter(std::shared_ptr<RuntimeProfile::Counter> rf_input_rows,
                                 std::shared_ptr<RuntimeProfile::Counter> rf_filter_rows,
@@ -117,6 +120,9 @@ class RuntimeFilterExpr final : public VExpr {
     std::shared_ptr<RuntimeProfile::Counter> predicate_always_true_rows_counter() const {
         return _always_true_filter_rows;
     }
+    bool is_slot_ref() const override { return false; }
+    bool is_virtual_slot_ref() const override { return false; }
+    bool is_column_ref() const override { return false; }
 
 private:
     VExprSPtr _impl;
diff --git a/be/src/exprs/short_circuit_evaluation_expr.h b/be/src/exprs/short_circuit_evaluation_expr.h
index 47a37b360c6e90..7240207aacad71 100644
--- a/be/src/exprs/short_circuit_evaluation_expr.h
+++ b/be/src/exprs/short_circuit_evaluation_expr.h
@@ -63,6 +63,13 @@ class ShortCircuitIfExpr final : public ShortCircuitExpr {
     ~ShortCircuitIfExpr() override = default;
 
     const std::string& expr_name() const override { return IF_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        node.__set_short_circuit_evaluation(true);
+        *cloned_expr = ShortCircuitIfExpr::create_shared(node);
+        return Status::OK();
+    }
 
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
                                size_t count, ColumnPtr& result_column) const override;
@@ -76,6 +83,18 @@ class ShortCircuitCaseExpr final : public ShortCircuitExpr {
     ShortCircuitCaseExpr(const TExprNode& node);
     ~ShortCircuitCaseExpr() override = default;
     const std::string& expr_name() const override { return CASE_NAME; }
+    bool has_else_expr() const { return _has_else_expr; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        TCaseExpr case_node;
+        case_node.__set_has_case_expr(false);
+        case_node.__set_has_else_expr(_has_else_expr);
+        node.__set_case_expr(case_node);
+        node.__set_short_circuit_evaluation(true);
+        *cloned_expr = ShortCircuitCaseExpr::create_shared(node);
+        return Status::OK();
+    }
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
                                size_t count, ColumnPtr& result_column) const override;
 
@@ -91,6 +110,13 @@ class ShortCircuitIfNullExpr final : public ShortCircuitExpr {
     ~ShortCircuitIfNullExpr() override = default;
 
     const std::string& expr_name() const override { return IFNULL_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        node.__set_short_circuit_evaluation(true);
+        *cloned_expr = ShortCircuitIfNullExpr::create_shared(node);
+        return Status::OK();
+    }
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
                                size_t count, ColumnPtr& result_column) const override;
 
@@ -104,10 +130,17 @@ class ShortCircuitCoalesceExpr final : public ShortCircuitExpr {
     ShortCircuitCoalesceExpr(const TExprNode& node) : ShortCircuitExpr(node) {}
     ~ShortCircuitCoalesceExpr() override = default;
     const std::string& expr_name() const override { return COALESCE_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        node.__set_short_circuit_evaluation(true);
+        *cloned_expr = ShortCircuitCoalesceExpr::create_shared(node);
+        return Status::OK();
+    }
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
                                size_t count, ColumnPtr& result_column) const override;
 
 private:
     inline static const std::string COALESCE_NAME = "coalesce";
 };
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/exprs/vbloom_predicate.h b/be/src/exprs/vbloom_predicate.h
index f23bde0d9ad3fd..410bb5c8d370b3 100644
--- a/be/src/exprs/vbloom_predicate.h
+++ b/be/src/exprs/vbloom_predicate.h
@@ -59,6 +59,13 @@ class VBloomPredicate final : public VExpr {
     std::shared_ptr<BloomFilterFuncBase> get_bloom_filter_func() const override { return _filter; }
 
     uint64_t get_digest(uint64_t seed) const override;
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto cloned = VBloomPredicate::create_shared(clone_texpr_node());
+        cloned->set_filter(_filter);
+        *cloned_expr = std::move(cloned);
+        return Status::OK();
+    }
 
 private:
     Status _do_execute(VExprContext* context, const Block* block, const uint8_t* __restrict filter,
diff --git a/be/src/exprs/vcase_expr.h b/be/src/exprs/vcase_expr.h
index 97b2551091d100..6787283f0c5d23 100644
--- a/be/src/exprs/vcase_expr.h
+++ b/be/src/exprs/vcase_expr.h
@@ -59,6 +59,17 @@ class VCaseExpr final : public VExpr {
     void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
     const std::string& expr_name() const override;
     std::string debug_string() const override;
+    bool has_else_expr() const { return _has_else_expr; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        TCaseExpr case_node;
+        case_node.__set_has_case_expr(false);
+        case_node.__set_has_else_expr(_has_else_expr);
+        node.__set_case_expr(case_node);
+        *cloned_expr = VCaseExpr::create_shared(node);
+        return Status::OK();
+    }
 
 private:
     template <typename IndexType, typename ColumnType>
diff --git a/be/src/exprs/vcast_expr.h b/be/src/exprs/vcast_expr.h
index c3f2526794b3b8..f0f3ead95d56af 100644
--- a/be/src/exprs/vcast_expr.h
+++ b/be/src/exprs/vcast_expr.h
@@ -57,6 +57,11 @@ class VCastExpr : public VExpr {
     const DataTypePtr& get_target_type() const;
 
     virtual std::string cast_name() const { return "CAST"; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VCastExpr::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
 
     uint64_t get_digest(uint64_t seed) const override {
         auto res = VExpr::get_digest(seed);
@@ -94,6 +99,13 @@ class TryCastExpr final : public VCastExpr {
                                size_t count, ColumnPtr& result_column) const override;
     ~TryCastExpr() override = default;
     std::string cast_name() const override { return "TRY CAST"; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        node.__set_is_cast_nullable(_original_cast_return_is_nullable);
+        *cloned_expr = TryCastExpr::create_shared(node);
+        return Status::OK();
+    }
 
 private:
     DataTypePtr original_cast_return_type() const;
diff --git a/be/src/exprs/vcolumn_ref.h b/be/src/exprs/vcolumn_ref.h
index e4485e5815e02f..33ade77defaaba 100644
--- a/be/src/exprs/vcolumn_ref.h
+++ b/be/src/exprs/vcolumn_ref.h
@@ -81,6 +81,19 @@ class VColumnRef final : public VExpr {
         }
     }
 
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        TColumnRef column_ref;
+        column_ref.__set_column_id(_column_id);
+        column_ref.__set_column_name(_column_name);
+        node.__set_column_ref(column_ref);
+        auto cloned = VColumnRef::create_shared(node);
+        cloned->set_gap(_gap.load());
+        *cloned_expr = std::move(cloned);
+        return Status::OK();
+    }
+
     std::string debug_string() const override {
         std::stringstream out;
         out << "VColumnRef(slot_id: " << _column_id << ",column_name: " << _column_name
diff --git a/be/src/exprs/vcompound_pred.h b/be/src/exprs/vcompound_pred.h
index 9772efb90bc270..b703e30ea6c16f 100644
--- a/be/src/exprs/vcompound_pred.h
+++ b/be/src/exprs/vcompound_pred.h
@@ -60,6 +60,11 @@ class VCompoundPred : public VectorizedFnCall {
 #endif
 
     const std::string& expr_name() const override { return _expr_name; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VCompoundPred::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
 
     bool can_evaluate_zonemap_filter() const override {
         switch (_op) {
diff --git a/be/src/exprs/vcondition_expr.h b/be/src/exprs/vcondition_expr.h
index ceb6ed1396d78f..6beade74b3e3d6 100644
--- a/be/src/exprs/vcondition_expr.h
+++ b/be/src/exprs/vcondition_expr.h
@@ -65,6 +65,11 @@ class VectorizedIfExpr : public VConditionExpr {
                                size_t count, ColumnPtr& result_column) const override;
 
     const std::string& expr_name() const override { return IF_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VectorizedIfExpr::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
     inline static const std::string IF_NAME = "if";
 
 protected:
@@ -123,6 +128,11 @@ class VectorizedIfNullExpr : public VectorizedIfExpr {
 public:
     VectorizedIfNullExpr(const TExprNode& node) : VectorizedIfExpr(node) {}
     const std::string& expr_name() const override { return IF_NULL_NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VectorizedIfNullExpr::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
     inline static const std::string IF_NULL_NAME = "ifnull";
 
     Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
@@ -137,6 +147,11 @@ class VectorizedCoalesceExpr : public VConditionExpr {
                                size_t count, ColumnPtr& result_column) const override;
     VectorizedCoalesceExpr(const TExprNode& node) : VConditionExpr(node) {}
     const std::string& expr_name() const override { return NAME; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VectorizedCoalesceExpr::create_shared(clone_texpr_node());
+        return Status::OK();
+    }
     inline static const std::string NAME = "coalesce";
 };
 
diff --git a/be/src/exprs/vdirect_in_predicate.h b/be/src/exprs/vdirect_in_predicate.h
index 21b729f140cc5c..2fd1e9a35febc7 100644
--- a/be/src/exprs/vdirect_in_predicate.h
+++ b/be/src/exprs/vdirect_in_predicate.h
@@ -46,7 +46,7 @@ class VDirectInPredicate final : public VExpr {
     // materialization and slot-IN rewrite that would otherwise rebuild child-typed literals from
     // dictionary codes.
     VDirectInPredicate(const TExprNode& node, const std::shared_ptr<HybridSetBase>& filter,
-                       bool hybrid_set_values_match_child_type)
+                       bool hybrid_set_values_match_child_type = true)
             : VExpr(node),
               _filter(filter),
               _hybrid_set_values_match_child_type(hybrid_set_values_match_child_type),
@@ -98,6 +98,13 @@ class VDirectInPredicate final : public VExpr {
                std::dynamic_pointer_cast<VSlotRef>(get_child(0)) != nullptr;
     }
 
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VDirectInPredicate::create_shared(clone_texpr_node(), _filter,
+                                                         _hybrid_set_values_match_child_type);
+        return Status::OK();
+    }
+
     bool get_slot_in_expr(VExprSPtr& new_root) const {
         if (!_hybrid_set_values_match_child_type) {
             return false;
diff --git a/be/src/exprs/vectorized_fn_call.cpp b/be/src/exprs/vectorized_fn_call.cpp
index 386edffb5e7e5e..8cc189f8b9939e 100644
--- a/be/src/exprs/vectorized_fn_call.cpp
+++ b/be/src/exprs/vectorized_fn_call.cpp
@@ -81,7 +81,9 @@ const static std::set<std::string> DISTANCE_FUNCS = {L2DistanceApproximate::name
 const static std::set<TExprOpcode::type> OPS_FOR_ANN_RANGE_SEARCH = {
         TExprOpcode::GE, TExprOpcode::LE, TExprOpcode::LE, TExprOpcode::GT, TExprOpcode::LT};
 
-VectorizedFnCall::VectorizedFnCall(const TExprNode& node) : VExpr(node) {}
+VectorizedFnCall::VectorizedFnCall(const TExprNode& node) : VExpr(node) {
+    _function_name = _fn.name.function_name;
+}
 
 Status VectorizedFnCall::prepare(RuntimeState* state, const RowDescriptor& desc,
                                  VExprContext* context) {
diff --git a/be/src/exprs/vectorized_fn_call.h b/be/src/exprs/vectorized_fn_call.h
index c6e24c5377e48a..7bbb4303d41c47 100644
--- a/be/src/exprs/vectorized_fn_call.h
+++ b/be/src/exprs/vectorized_fn_call.h
@@ -101,6 +101,12 @@ class VectorizedFnCall : public VExpr {
                                   segment_v2::AnnRangeSearchRuntime& runtime,
                                   bool& suitable_for_ann_index) override;
 
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<VectorizedFnCall>(*this);
+        return Status::OK();
+    }
+
 protected:
     FunctionBasePtr _function;
     std::string _expr_name;
diff --git a/be/src/exprs/vexpr.cpp b/be/src/exprs/vexpr.cpp
index d88b18d5f022a5..c01d299f411a08 100644
--- a/be/src/exprs/vexpr.cpp
+++ b/be/src/exprs/vexpr.cpp
@@ -378,6 +378,51 @@ VExpr::VExpr(DataTypePtr type, bool is_slotref)
     }
 }
 
+TExprNode VExpr::clone_texpr_node() const {
+    TExprNode node;
+    node.__set_node_type(_node_type);
+    node.__set_opcode(_opcode);
+    node.__set_type(create_type_desc(remove_nullable(_data_type)->get_primitive_type(),
+                                     static_cast<int>(_data_type->get_precision()),
+                                     static_cast<int>(_data_type->get_scale())));
+    node.__set_is_nullable(_data_type->is_nullable());
+    node.__set_num_children(get_num_children());
+    node.__set_fn(_fn);
+    return node;
+}
+
+Status VExpr::clone_node(VExprSPtr* cloned_expr) const {
+    DORIS_CHECK(cloned_expr != nullptr);
+    return Status::NotSupported("Cannot clone expression {} for file-local rewrite", expr_name());
+}
+
+Status VExpr::deep_clone(VExprSPtr* cloned_expr,
+                         const VExprCloneNodeOverride& clone_node_override) const {
+    DORIS_CHECK(cloned_expr != nullptr);
+
+    VExprSPtr cloned;
+    if (clone_node_override) {
+        RETURN_IF_ERROR(clone_node_override(*this, &cloned));
+    }
+    if (cloned == nullptr) {
+        RETURN_IF_ERROR(clone_node(&cloned));
+    }
+    DORIS_CHECK(cloned != nullptr);
+
+    VExprSPtrs cloned_children;
+    cloned_children.reserve(_children.size());
+    for (const auto& child : _children) {
+        DORIS_CHECK(child != nullptr);
+        VExprSPtr cloned_child;
+        RETURN_IF_ERROR(child->deep_clone(&cloned_child, clone_node_override));
+        cloned_children.push_back(std::move(cloned_child));
+    }
+    cloned->set_children(std::move(cloned_children));
+    cloned->reset_prepare_state();
+    *cloned_expr = std::move(cloned);
+    return Status::OK();
+}
+
 Status VExpr::prepare(RuntimeState* state, const RowDescriptor& row_desc, VExprContext* context) {
     ++context->_depth_num;
     if (context->_depth_num > config::max_depth_of_expr_tree) {
@@ -407,6 +452,15 @@ Status VExpr::open(RuntimeState* state, VExprContext* context,
     return Status::OK();
 }
 
+void VExpr::reset_prepare_state() {
+    _prepared = false;
+    _prepare_finished = false;
+    _open_finished = false;
+    for (auto& child : _children) {
+        child->reset_prepare_state();
+    }
+}
+
 void VExpr::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
     for (auto& i : _children) {
         i->close(context, scope);
@@ -757,8 +811,9 @@ Status VExpr::get_const_col(VExprContext* context,
         return Status::OK();
     }
 
-    if (_constant_col != nullptr) {
-        DCHECK(column_wrapper != nullptr);
+    if (_constant_col != nullptr && column_wrapper == nullptr) {
+        return Status::OK();
+    } else if (_constant_col != nullptr) {
         *column_wrapper = _constant_col;
         return Status::OK();
     }
diff --git a/be/src/exprs/vexpr.h b/be/src/exprs/vexpr.h
index 3b48fd3d2c60d7..5da5206d1b2aa7 100644
--- a/be/src/exprs/vexpr.h
+++ b/be/src/exprs/vexpr.h
@@ -24,6 +24,7 @@
 
 #include <cstddef>
 #include <cstdint>
+#include <functional>
 #include <memory>
 #include <ostream>
 #include <string>
@@ -81,6 +82,7 @@ struct AnnRangeSearchRuntime;
 // the relatioinship between threads and classes.
 
 using Selector = IColumn::Selector;
+using VExprCloneNodeOverride = std::function<Status(const VExpr&, VExprSPtr*)>;
 
 struct AnnRangeSearchEvaluationResult {
     // Indicates whether the expr row_bitmap has been updated.
@@ -215,11 +217,13 @@ class VExpr {
 
     const DataTypePtr& data_type() const { return _data_type; }
 
-    bool is_slot_ref() const { return _node_type == TExprNodeType::SLOT_REF; }
+    virtual bool is_slot_ref() const { return _node_type == TExprNodeType::SLOT_REF; }
 
-    bool is_virtual_slot_ref() const { return _node_type == TExprNodeType::VIRTUAL_SLOT_REF; }
+    virtual bool is_virtual_slot_ref() const {
+        return _node_type == TExprNodeType::VIRTUAL_SLOT_REF;
+    }
 
-    bool is_column_ref() const { return _node_type == TExprNodeType::COLUMN_REF; }
+    virtual bool is_column_ref() const { return _node_type == TExprNodeType::COLUMN_REF; }
 
     virtual bool is_literal() const { return false; }
 
@@ -253,6 +257,10 @@ class VExpr {
 
     static bool contains_blockable_function(const VExprContextSPtrs& ctxs);
 
+    Status deep_clone(VExprSPtr* cloned_expr,
+                      const VExprCloneNodeOverride& clone_node_override = {}) const;
+    virtual Status clone_node(VExprSPtr* cloned_expr) const;
+
     bool is_nullable() const { return _data_type->is_nullable(); }
 
     PrimitiveType result_type() const { return _data_type->get_primitive_type(); }
@@ -267,6 +275,7 @@ class VExpr {
     virtual const VExprSPtrs& children() const { return _children; }
     void set_children(const VExprSPtrs& children) { _children = children; }
     void set_children(VExprSPtrs&& children) { _children = std::move(children); }
+    void reset_prepare_state();
     virtual std::string debug_string() const;
     static std::string debug_string(const VExprSPtrs& exprs);
     static std::string debug_string(const VExprContextSPtrs& ctxs);
@@ -274,7 +283,7 @@ class VExpr {
     static ColumnPtr filter_column_with_selector(const ColumnPtr& origin_column,
                                                  const Selector* selector, size_t count) {
         if (selector == nullptr) {
-            DCHECK_EQ(origin_column->size(), count);
+            DCHECK_EQ(origin_column->size(), count) << origin_column->get_name();
             return origin_column;
         }
         DCHECK_EQ(count, selector->size());
@@ -368,6 +377,8 @@ class VExpr {
     virtual uint64_t get_digest(uint64_t seed) const;
 
 protected:
+    TExprNode clone_texpr_node() const;
+
     /// Simple debug string that provides no expr subclass-specific information
     std::string debug_string(const std::string& expr_name) const {
         std::stringstream out;
diff --git a/be/src/exprs/vin_predicate.h b/be/src/exprs/vin_predicate.h
index 6cf3858768264c..6830f1bc4c2c20 100644
--- a/be/src/exprs/vin_predicate.h
+++ b/be/src/exprs/vin_predicate.h
@@ -64,6 +64,15 @@ class VInPredicate MOCK_REMOVE(final) : public VExpr {
     bool can_evaluate_zonemap_filter() const override;
 
     uint64_t get_digest(uint64_t seed) const override { return 0; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        auto node = clone_texpr_node();
+        TInPredicate in_predicate;
+        in_predicate.__set_is_not_in(_is_not_in);
+        node.__set_in_predicate(in_predicate);
+        *cloned_expr = VInPredicate::create_shared(node);
+        return Status::OK();
+    }
 
 private:
     Status _materialize_for_zonemap_filter(VExprContext* context);
diff --git a/be/src/exprs/vliteral.cpp b/be/src/exprs/vliteral.cpp
index 551839f699e2e6..9b93d7097274ee 100644
--- a/be/src/exprs/vliteral.cpp
+++ b/be/src/exprs/vliteral.cpp
@@ -37,12 +37,6 @@ namespace doris {
 
 class VExprContext;
 
-void VLiteral::init(const TExprNode& node) {
-    Field field;
-    field = _data_type->get_field(node);
-    _column_ptr = _data_type->create_column_const(1, field);
-}
-
 Status VLiteral::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) {
     RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
     return Status::OK();
diff --git a/be/src/exprs/vliteral.h b/be/src/exprs/vliteral.h
index b1b8e89157d420..89988e2ba31142 100644
--- a/be/src/exprs/vliteral.h
+++ b/be/src/exprs/vliteral.h
@@ -24,6 +24,7 @@
 #include "common/status.h"
 #include "core/data_type/data_type.h"
 #include "core/data_type_serde/data_type_serde.h"
+#include "core/field.h"
 #include "exprs/vexpr.h"
 
 namespace doris {
@@ -39,10 +40,19 @@ class VLiteral : public VExpr {
     VLiteral(const TExprNode& node, bool should_init = true)
             : VExpr(node), _expr_name(_data_type->get_name()) {
         if (should_init) {
-            init(node);
+            Field field;
+            field = _data_type->get_field(node);
+            _column_ptr = _data_type->create_column_const(1, field);
         }
     }
 
+    VLiteral(const DataTypePtr& type, const Field& field) : VExpr(type, false) {
+        _data_type = type;
+        _column_ptr = _data_type->create_column_const(1, field);
+        _node_type = TExprNodeType::LITERAL;
+        _expr_name = _data_type->get_name();
+    }
+
 #ifdef BE_TEST
     VLiteral() = default;
     MOCK_FUNCTION std::string value() const;
@@ -67,13 +77,18 @@ class VLiteral : public VExpr {
     bool equals(const VExpr& other) override;
 
     uint64_t get_digest(uint64_t seed) const override;
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        Field field;
+        _column_ptr->get(0, field);
+        *cloned_expr = VLiteral::create_shared(_data_type, field);
+        return Status::OK();
+    }
 
 protected:
+    VLiteral(const DataTypePtr& type) : VExpr(type, false) {}
     ColumnPtr _column_ptr;
     std::string _expr_name;
-
-private:
-    void init(const TExprNode& node);
 };
 
 } // namespace doris
diff --git a/be/src/exprs/vslot_ref.cpp b/be/src/exprs/vslot_ref.cpp
index 87aad6b977ecbe..f02ef50d5751c3 100644
--- a/be/src/exprs/vslot_ref.cpp
+++ b/be/src/exprs/vslot_ref.cpp
@@ -41,10 +41,28 @@ VSlotRef::VSlotRef(const doris::TExprNode& node)
 VSlotRef::VSlotRef(const SlotDescriptor* desc)
         : VExpr(desc->type(), true), _slot_id(desc->id()), _column_id(-1), _column_name(nullptr) {}
 
+VSlotRef::VSlotRef(int slot_id, int column_id, int column_uniq_id, const DataTypePtr& type,
+                   std::string column_name)
+        : VExpr(type, true),
+          _slot_id(slot_id),
+          _column_id(column_id),
+          _column_uniq_id(column_uniq_id),
+          _owned_column_name(std::move(column_name)),
+          _column_name(&_owned_column_name) {}
+
 Status VSlotRef::prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
                          VExprContext* context) {
-    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
     DCHECK_EQ(_children.size(), 0);
+    if (_prepared) {
+        return Status::OK();
+    }
+    if (_column_id >= 0 && _column_name != nullptr) {
+        _prepared = true;
+        _prepare_finished = true;
+        return Status::OK();
+    }
+    _prepared = true;
+    RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
     if (_slot_id == -1) {
         _prepare_finished = true;
         return Status::OK();
@@ -109,6 +127,27 @@ DataTypePtr VSlotRef::execute_type(const Block* block) const {
     return block->get_by_position(_column_id).type;
 }
 
+Status VSlotRef::clone_node(VExprSPtr* cloned_expr) const {
+    DORIS_CHECK(cloned_expr != nullptr);
+    if (_column_id >= 0 && _column_name != nullptr) {
+        *cloned_expr = VSlotRef::create_shared(_slot_id, _column_id, _column_uniq_id, _data_type,
+                                               *_column_name);
+        return Status::OK();
+    }
+    auto node = clone_texpr_node();
+    TSlotRef slot_ref;
+    slot_ref.__set_slot_id(_slot_id);
+    node.__set_slot_ref(slot_ref);
+    node.__set_label(_column_label);
+    auto cloned = VSlotRef::create_shared(node);
+    auto* cloned_slot_ref = static_cast<VSlotRef*>(cloned.get());
+    cloned_slot_ref->_column_id = _column_id;
+    cloned_slot_ref->_column_uniq_id = _column_uniq_id;
+    cloned_slot_ref->_column_name = _column_name;
+    *cloned_expr = std::move(cloned);
+    return Status::OK();
+}
+
 const std::string& VSlotRef::expr_name() const {
     return *_column_name;
 }
diff --git a/be/src/exprs/vslot_ref.h b/be/src/exprs/vslot_ref.h
index ef61edc384c2f2..a67bdc1953cd0a 100644
--- a/be/src/exprs/vslot_ref.h
+++ b/be/src/exprs/vslot_ref.h
@@ -31,12 +31,14 @@ class TExprNode;
 class Block;
 class VExprContext;
 
-class VSlotRef MOCK_REMOVE(final) : public VExpr {
+class VSlotRef : public VExpr {
     ENABLE_FACTORY_CREATOR(VSlotRef);
 
 public:
     VSlotRef(const TExprNode& node);
     VSlotRef(const SlotDescriptor* desc);
+    VSlotRef(int slot_id, int column_id, int column_uniq_id, const DataTypePtr& type,
+             std::string column_name);
 #ifdef BE_TEST
     VSlotRef() = default;
     void set_slot_id(int slot_id) { _slot_id = slot_id; }
@@ -58,6 +60,7 @@ class VSlotRef MOCK_REMOVE(final) : public VExpr {
     int column_id() const { return _column_id; }
 
     MOCK_FUNCTION int slot_id() const { return _slot_id; }
+    int column_uniq_id() const { return _column_uniq_id; }
 
     bool equals(const VExpr& other) override;
 
@@ -67,16 +70,24 @@ class VSlotRef MOCK_REMOVE(final) : public VExpr {
         column_ids.insert(_column_id);
     }
 
-    MOCK_FUNCTION const std::string& column_name() const { return *_column_name; }
+    virtual const std::string& column_name() const { return *_column_name; }
 
     uint64_t get_digest(uint64_t seed) const override;
 
     double execute_cost() const override { return 0.0; }
+    Status clone_node(VExprSPtr* cloned_expr) const override;
+
+protected:
+    VSlotRef(int slot_id, int column_id, int column_uniq_id)
+            : _slot_id(slot_id), _column_id(column_id), _column_uniq_id(column_uniq_id) {
+        _node_type = TExprNodeType::SLOT_REF;
+    }
 
 private:
     int _slot_id;
     int _column_id;
     int _column_uniq_id = -1;
+    std::string _owned_column_name;
     const std::string* _column_name = nullptr;
     const std::string _column_label;
 };
diff --git a/be/src/exprs/vtopn_pred.h b/be/src/exprs/vtopn_pred.h
index 94887588f536da..a6edec65accd3d 100644
--- a/be/src/exprs/vtopn_pred.h
+++ b/be/src/exprs/vtopn_pred.h
@@ -63,6 +63,11 @@ class VTopNPred : public VExpr {
     }
 
     int source_node_id() const { return _source_node_id; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = VTopNPred::create_shared(clone_texpr_node(), _source_node_id, nullptr);
+        return Status::OK();
+    }
 
     Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override {
         _predicate = &state->get_query_ctx()->get_runtime_predicate(_source_node_id);
diff --git a/be/src/format/CMakeLists.txt b/be/src/format/CMakeLists.txt
index ef9dab92c00f97..bc0325f3e0f252 100644
--- a/be/src/format/CMakeLists.txt
+++ b/be/src/format/CMakeLists.txt
@@ -22,6 +22,9 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/format")
 set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/format")
 
 file(GLOB_RECURSE SRC_FILES CONFIGURE_DEPENDS *.cpp)
+file(GLOB_RECURSE FORMAT_V2_SRC_FILES CONFIGURE_DEPENDS
+    ${CMAKE_CURRENT_SOURCE_DIR}/../format_v2/*.cpp)
+list(APPEND SRC_FILES ${FORMAT_V2_SRC_FILES})
 
 # Lance reader requires Rust static library (BUILD_RUST_READERS=ON)
 if (NOT BUILD_RUST_READERS)
diff --git a/be/src/format/csv/csv_reader.cpp b/be/src/format/csv/csv_reader.cpp
index 3d1e978ffe911f..b8f0be49bfea1e 100644
--- a/be/src/format/csv/csv_reader.cpp
+++ b/be/src/format/csv/csv_reader.cpp
@@ -668,8 +668,8 @@ Status CsvReader::_create_file_reader(bool need_schema) {
                                                         need_schema));
     } else {
         _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0;
-        io::FileReaderOptions reader_options =
-                FileFactory::get_reader_options(_state, _file_description);
+        io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+                _state ? _state->query_options() : _default_query_options, _file_description);
         io::FileReaderSPtr file_reader;
         if (_io_ctx_holder) {
             file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
diff --git a/be/src/format/generic_reader.h b/be/src/format/generic_reader.h
index d849d595056adb..88fc3fb85a0eb1 100644
--- a/be/src/format/generic_reader.h
+++ b/be/src/format/generic_reader.h
@@ -40,6 +40,7 @@
 #include "runtime/runtime_state.h"
 #include "storage/predicate/block_column_predicate.h"
 #include "storage/segment/common.h"
+#include "storage/segment/condition_cache.h"
 #include "util/profile_collector.h"
 
 namespace doris {
@@ -51,16 +52,6 @@ namespace doris {
 class Block;
 class VSlotRef;
 
-// Context passed from FileScanner to readers for condition cache integration.
-// On MISS: readers populate filter_result per-granule during predicate evaluation.
-// On HIT: readers skip granules where filter_result[granule] == false.
-struct ConditionCacheContext {
-    bool is_hit = false;
-    std::shared_ptr<std::vector<bool>> filter_result; // per-granule: true = has surviving rows
-    int64_t base_granule = 0; // global granule index of the first granule in filter_result
-    static constexpr int GRANULE_SIZE = 2048;
-};
-
 /// Base context for the unified init_reader(ReaderInitContext*) template method.
 /// Contains fields shared by ALL reader types. Format-specific readers define
 /// subclasses (ParquetInitContext, OrcInitContext, etc.) with extra fields.
@@ -299,6 +290,7 @@ class GenericReader : public ProfileCollector {
     // ---- get_columns cache ----
     bool _get_columns_cached = false;
     std::unordered_map<std::string, DataTypePtr> _cached_name_to_type;
+    const TQueryOptions _default_query_options;
 };
 
 /// Provides an accessor for the current batch's row positions within the file.
diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp
index 8d53b6009e6bef..1aa19574b39a58 100644
--- a/be/src/format/json/new_json_reader.cpp
+++ b/be/src/format/json/new_json_reader.cpp
@@ -498,8 +498,8 @@ Status NewJsonReader::_open_file_reader(bool need_schema) {
                                                         need_schema));
     } else {
         _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0;
-        io::FileReaderOptions reader_options =
-                FileFactory::get_reader_options(_state, _file_description);
+        io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+                _state ? _state->query_options() : _default_query_options, _file_description);
         io::FileReaderSPtr file_reader;
         if (_io_ctx_holder) {
             file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
diff --git a/be/src/format/native/native_reader.cpp b/be/src/format/native/native_reader.cpp
index 029d7ff2024f20..3632b6e4e0a1c9 100644
--- a/be/src/format/native/native_reader.cpp
+++ b/be/src/format/native/native_reader.cpp
@@ -137,8 +137,8 @@ Status NativeReader::init_reader() {
                                                   _scan_params.broker_addresses.end());
     }
 
-    io::FileReaderOptions reader_options =
-            FileFactory::get_reader_options(_state, file_description);
+    io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+            _state ? _state->query_options() : _default_query_options, file_description);
     auto reader_res =
             _io_ctx_holder ? io::DelegateReader::create_file_reader(
                                      _profile, system_properties, file_description, reader_options,
diff --git a/be/src/format/orc/vorc_reader.cpp b/be/src/format/orc/vorc_reader.cpp
index 54ced60d6e0361..80dc857ddf4bc3 100644
--- a/be/src/format/orc/vorc_reader.cpp
+++ b/be/src/format/orc/vorc_reader.cpp
@@ -382,8 +382,8 @@ Status OrcReader::_create_file_reader() {
     if (_file_input_stream == nullptr) {
         _file_description.mtime =
                 _scan_range.__isset.modification_time ? _scan_range.modification_time : 0;
-        io::FileReaderOptions reader_options =
-                FileFactory::get_reader_options(_state, _file_description);
+        io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+                _state ? _state->query_options() : _default_query_options, _file_description);
         io::FileReaderSPtr inner_reader;
         if (_io_ctx_holder != nullptr) {
             inner_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
diff --git a/be/src/format/parquet/vparquet_reader.cpp b/be/src/format/parquet/vparquet_reader.cpp
index 48dd987c44f83a..11758c76410784 100644
--- a/be/src/format/parquet/vparquet_reader.cpp
+++ b/be/src/format/parquet/vparquet_reader.cpp
@@ -325,8 +325,8 @@ Status ParquetReader::_open_file() {
         ++_reader_statistics.open_file_num;
         _file_description.mtime =
                 _scan_range.__isset.modification_time ? _scan_range.modification_time : 0;
-        io::FileReaderOptions reader_options =
-                FileFactory::get_reader_options(_state, _file_description);
+        io::FileReaderOptions reader_options = FileFactory::get_reader_options(
+                _state ? _state->query_options() : _default_query_options, _file_description);
         if (_io_ctx_holder) {
             _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
                     _profile, _system_properties, _file_description, reader_options,
diff --git a/be/src/format/table/deletion_vector_reader.cpp b/be/src/format/table/deletion_vector_reader.cpp
index bfe34a5f555f94..d7e33c923d95b7 100644
--- a/be/src/format/table/deletion_vector_reader.cpp
+++ b/be/src/format/table/deletion_vector_reader.cpp
@@ -54,9 +54,9 @@ Status DeletionVectorReader::_create_file_reader() {
         return Status::EndOfFile("stop read.");
     }
 
-    _file_description.mtime = _range.__isset.modification_time ? _range.modification_time : 0;
+    _file_description.mtime = _desc.modification_time;
     io::FileReaderOptions reader_options =
-            FileFactory::get_reader_options(_state, _file_description);
+            FileFactory::get_reader_options(_state->query_options(), _file_description);
     _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
             _profile, _system_properties, _file_description, reader_options,
             io::DelegateReader::AccessMode::RANDOM, _io_ctx));
@@ -64,20 +64,13 @@ Status DeletionVectorReader::_create_file_reader() {
 }
 
 void DeletionVectorReader::_init_file_description() {
-    _file_description.path = _range.path;
-    _file_description.file_size = _range.__isset.file_size ? _range.file_size : -1;
-    if (_range.__isset.fs_name) {
-        _file_description.fs_name = _range.fs_name;
-    }
+    _file_description.path = _desc.path;
+    _file_description.file_size = _desc.file_size;
+    _file_description.fs_name = _desc.fs_name;
 }
 
 void DeletionVectorReader::_init_system_properties() {
-    if (_range.__isset.file_type) {
-        // for compatibility
-        _system_properties.system_type = _range.file_type;
-    } else {
-        _system_properties.system_type = _params.file_type;
-    }
+    _system_properties.system_type = _params.file_type;
     _system_properties.properties = _params.properties;
     _system_properties.hdfs_params = _params.hdfs_params;
     if (_params.__isset.broker_addresses) {
diff --git a/be/src/format/table/deletion_vector_reader.h b/be/src/format/table/deletion_vector_reader.h
index 0663f3b28490ef..968344a8496bc7 100644
--- a/be/src/format/table/deletion_vector_reader.h
+++ b/be/src/format/table/deletion_vector_reader.h
@@ -36,6 +36,22 @@ struct IOContext;
 } // namespace io
 
 namespace doris {
+struct DeleteFileDesc {
+    enum class Format {
+        PAIMON,
+        ICEBERG,
+    };
+
+    std::string key = "";
+    std::string path = "";
+    std::string fs_name = "";
+    int64_t start_offset = 0;
+    int64_t size = 0;
+    int64_t file_size = -1;
+    int64_t modification_time = 0;
+    Format format = Format::PAIMON;
+};
+
 class DeletionVectorReader {
     ENABLE_FACTORY_CREATOR(DeletionVectorReader);
 
@@ -43,7 +59,22 @@ class DeletionVectorReader {
     DeletionVectorReader(RuntimeState* state, RuntimeProfile* profile,
                          const TFileScanRangeParams& params, const TFileRangeDesc& range,
                          io::IOContext* io_ctx)
-            : _state(state), _profile(profile), _range(range), _params(params), _io_ctx(io_ctx) {}
+            : _state(state), _profile(profile), _params(params), _io_ctx(io_ctx) {
+        _desc = DeleteFileDesc {
+                .key = "",
+                .path = range.path,
+                .fs_name = range.__isset.fs_name ? range.fs_name : "",
+                .start_offset = range.start_offset,
+                .size = range.size,
+                .file_size = range.__isset.file_size ? range.file_size : -1,
+                .modification_time = range.__isset.modification_time ? range.modification_time : 0};
+    }
+    DeletionVectorReader(RuntimeState* state, RuntimeProfile* profile,
+                         const TFileScanRangeParams& params, const DeleteFileDesc& desc,
+                         io::IOContext* io_ctx)
+            : _state(state), _profile(profile), _params(params), _io_ctx(io_ctx) {
+        _desc = desc;
+    }
     ~DeletionVectorReader() = default;
     Status open();
     Status read_at(size_t offset, Slice result);
@@ -56,7 +87,7 @@ class DeletionVectorReader {
 private:
     RuntimeState* _state = nullptr;
     RuntimeProfile* _profile = nullptr;
-    const TFileRangeDesc& _range;
+    DeleteFileDesc _desc;
     const TFileScanRangeParams& _params;
     io::IOContext* _io_ctx = nullptr;
 
diff --git a/be/src/format/table/iceberg_reader_mixin.h b/be/src/format/table/iceberg_reader_mixin.h
index bd049342195695..2bc15f18cf141a 100644
--- a/be/src/format/table/iceberg_reader_mixin.h
+++ b/be/src/format/table/iceberg_reader_mixin.h
@@ -343,9 +343,6 @@ class IcebergReaderMixin : public BaseReader, public TableSchemaChangeHelper {
     // id -> block column name
     std::unordered_map<int, std::string> _id_to_block_column_name;
 
-    // File column names used during init
-    std::vector<std::string> _file_col_names;
-
     std::function<std::shared_ptr<segment_v2::RowIdColumnIteratorV2>()>
             _create_topn_row_id_column_iterator;
 
diff --git a/be/src/format_v2/column_data.h b/be/src/format_v2/column_data.h
new file mode 100644
index 00000000000000..7816ea8263cb42
--- /dev/null
+++ b/be/src/format_v2/column_data.h
@@ -0,0 +1,410 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <ostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "exprs/vexpr_fwd.h"
+
+namespace doris::format {
+
+// File-local top-level column id.
+//
+// Scope:
+// - Only valid inside one physical file schema returned by FileReader::get_schema().
+// - For Parquet, this is the top-level field ordinal in the new reader schema.
+// - The synthetic row-position column also uses this type, with a reserved negative id.
+//
+// Do not use this for table/global column unique ids, block positions, nested child ids, or
+// slot ids. Nested child ids are carried by LocalColumnIndex::index below.
+class LocalColumnId {
+public:
+    constexpr LocalColumnId() = default;
+    explicit constexpr LocalColumnId(int32_t id) : _id(id) {}
+
+    static constexpr LocalColumnId invalid() { return LocalColumnId(); }
+
+    constexpr int32_t value() const { return _id; }
+    constexpr bool is_valid() const { return _id >= 0; }
+
+    constexpr bool operator==(const LocalColumnId& other) const { return _id == other._id; }
+    constexpr bool operator!=(const LocalColumnId& other) const { return !(*this == other); }
+    constexpr bool operator<(const LocalColumnId& other) const { return _id < other._id; }
+
+private:
+    int32_t _id = -1;
+};
+
+// Position of a file-local column in the Block produced by one FileScanRequest.
+//
+// This is assigned by TableColumnMapper/TableReader after predicate/non-predicate columns are
+// deduplicated. It is not a file schema id and it is not stable across requests. Use value() only
+// at the boundary where an existing Block or expression API still expects a size_t/int position.
+class LocalIndex {
+public:
+    constexpr LocalIndex() = default;
+    explicit constexpr LocalIndex(size_t index) : _index(index) {}
+
+    constexpr size_t value() const { return _index; }
+    constexpr bool operator==(const LocalIndex& other) const { return _index == other._index; }
+    constexpr bool operator<(const LocalIndex& other) const { return _index < other._index; }
+
+private:
+    size_t _index = 0;
+};
+
+// Position of a table/global output column in the final Block returned by TableReader.
+//
+// This type is reserved for boundaries that need to refer to caller-visible column order. It must
+// not be used to index a file-local Block, because schema evolution and lazy materialization can
+// make file-local order different from table output order.
+class GlobalIndex {
+public:
+    constexpr GlobalIndex() = default;
+    explicit constexpr GlobalIndex(size_t index) : _index(index) {}
+
+    constexpr size_t value() const { return _index; }
+    constexpr bool operator==(const GlobalIndex& other) const { return _index == other._index; }
+    constexpr bool operator<(const GlobalIndex& other) const { return _index < other._index; }
+
+private:
+    size_t _index = 0;
+};
+
+// Index of a split-local constant/default value used to materialize columns that are not read from
+// the physical file, such as partition columns, added columns with default values, and virtual
+// table-format columns.
+//
+// It is separate from LocalIndex because constants do not occupy a position in the file reader
+// output block unless an expression explicitly materializes them.
+class ConstantIndex {
+public:
+    constexpr ConstantIndex() = default;
+    explicit constexpr ConstantIndex(size_t index) : _index(index) {}
+
+    constexpr size_t value() const { return _index; }
+    constexpr bool operator==(const ConstantIndex& other) const { return _index == other._index; }
+    constexpr bool operator<(const ConstantIndex& other) const { return _index < other._index; }
+
+private:
+    size_t _index = 0;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const LocalColumnId& id) {
+    return os << id.value();
+}
+
+inline std::ostream& operator<<(std::ostream& os, const LocalIndex& index) {
+    return os << index.value();
+}
+
+inline std::ostream& operator<<(std::ostream& os, const GlobalIndex& index) {
+    return os << index.value();
+}
+
+inline std::ostream& operator<<(std::ostream& os, const ConstantIndex& index) {
+    return os << index.value();
+}
+
+// A split/file-local constant value used to materialize a table/global column without reading a
+// physical file column.
+//
+// Common producers are partition values, schema-evolution default expressions, generated columns
+// and table-format virtual columns. The entry is keyed by ConstantIndex in ConstantMap; global_index
+// keeps the link back to the caller-visible output column.
+struct ConstantEntry {
+    GlobalIndex global_index;
+    VExprContextSPtr expr;
+    DataTypePtr type;
+};
+
+// Per mapping/split collection of constants.
+//
+// ConstantIndex only has meaning within this container. Keeping constants separate from LocalIndex
+// makes it explicit that these values do not occupy positions in the file reader output Block.
+class ConstantMap {
+public:
+    ConstantIndex add(ConstantEntry entry) {
+        const auto index = ConstantIndex(_entries.size());
+        _entries.push_back(std::move(entry));
+        return index;
+    }
+
+    const ConstantEntry& get(ConstantIndex index) const {
+        DORIS_CHECK(index.value() < _entries.size());
+        return _entries[index.value()];
+    }
+
+    void clear() { _entries.clear(); }
+    bool empty() const { return _entries.empty(); }
+    size_t size() const { return _entries.size(); }
+
+    const std::vector<ConstantEntry>& entries() const { return _entries; }
+
+private:
+    std::vector<ConstantEntry> _entries;
+};
+
+// Target of a localized filter.
+//
+// A filter can either reference a file-local Block position or a constant entry. Unset entries mean
+// the filter cannot be evaluated below the table-reader finalize stage.
+struct FilterEntry {
+    enum class Kind {
+        UNSET,
+        LOCAL,
+        CONSTANT,
+    };
+
+    static FilterEntry local(LocalIndex index) {
+        return {.kind = Kind::LOCAL, .index = index.value()};
+    }
+
+    static FilterEntry constant(ConstantIndex index) {
+        return {.kind = Kind::CONSTANT, .index = index.value()};
+    }
+
+    bool is_set() const { return kind != Kind::UNSET; }
+    bool is_local() const { return kind == Kind::LOCAL; }
+    bool is_constant() const { return kind == Kind::CONSTANT; }
+
+    LocalIndex local_index() const {
+        DORIS_CHECK(is_local());
+        return LocalIndex(index);
+    }
+
+    ConstantIndex constant_index() const {
+        DORIS_CHECK(is_constant());
+        return ConstantIndex(index);
+    }
+
+    Kind kind = Kind::UNSET;
+    size_t index = 0;
+};
+
+enum ColumnType {
+    DATA_COLUMN = 0,  // normal data column
+    ROW_NUMBER = 1,   // row number in a file
+    GLOBAL_ROWID = 2, // global unique row id across files, used by TopN filter
+};
+
+struct GlobalRowIdContext {
+    uint8_t version = 0;
+    int64_t backend_id = 0;
+    uint32_t file_id = 0;
+};
+
+// Column schema definition shared by table/global projection and file-local schema matching.
+//
+// ColumnDefinition intentionally carries schema identity only. FE column unique ids are translated
+// to GlobalIndex at the FileScannerV2 boundary and must not appear in table/file reader APIs.
+struct ColumnDefinition {
+    // Typed identifier value used to match a column against another schema.
+    //
+    // - TYPE_NULL: no explicit identifier. BY_NAME falls back to ColumnDefinition::name.
+    // - TYPE_INT: interpreted by TableColumnMapperOptions::mode as a field id or file position.
+    // - TYPE_STRING: explicit name identifier.
+    //
+    // This is not the id that FileReader uses to read data. For example, a Parquet column can be
+    // matched by its optional Parquet field_id, while the reader still addresses it by a file-local
+    // ordinal.
+    Field identifier;
+    // Reader-local id of this node inside the file schema returned by FileReader::get_schema().
+    // Top-level fields use the root column ordinal and nested fields use the child ordinal under
+    // their parent. -1 means unset; special virtual file columns may use other negative ids.
+    // Table/global ColumnDefinition values can leave this as -1 because they are not read directly
+    // by a FileReader.
+    int32_t local_id = -1;
+    // Logical table column name. This is also the matching name for by-name file formats.
+    std::string name;
+    // Historical or external names for the same logical field. Table formats such as Iceberg can
+    // use this to resolve partition path keys after column rename.
+    std::vector<std::string> name_mapping {};
+    DataTypePtr type;
+    // Semantic nested children for this schema node.
+    //
+    // Table/global columns carry projected table children. File-local schemas returned by
+    // FileReader::get_schema() also expose semantic children, not physical reader wrappers. For
+    // example, MAP children are key/value and ARRAY children contain only the element field.
+    std::vector<ColumnDefinition> children {};
+    // Expression used to materialize missing/default/generated values when the column is not read
+    // directly from the file.
+    VExprContextSPtr default_expr = nullptr;
+    // Partition columns are constants from split metadata and should not be matched against file
+    // schema unless table-format logic explicitly asks for it.
+    bool is_partition_key = false;
+    // File-local column kind. For table/global columns this remains DATA_COLUMN.
+    ColumnType column_type = ColumnType::DATA_COLUMN;
+
+    bool has_identifier() const { return !identifier.is_null(); }
+    bool has_identifier_field_id() const { return identifier.get_type() == TYPE_INT; }
+    bool has_identifier_name() const { return identifier.get_type() == TYPE_STRING; }
+
+    // DuckDB-style helper for BY_FIELD_ID matching. The mapper binds the matching mode once, so a
+    // TYPE_INT identifier is interpreted as a field id only by the field-id matcher.
+    int32_t get_identifier_field_id() const {
+        DORIS_CHECK(has_identifier_field_id());
+        return identifier.get<TYPE_INT>();
+    }
+    // DuckDB-style helper for BY_NAME matching. When no explicit string identifier is present, the
+    // logical column name is the identifier.
+    const std::string& get_identifier_name() const {
+        if (identifier.is_null()) {
+            return name;
+        }
+        DORIS_CHECK(has_identifier_name());
+        return identifier.get<TYPE_STRING>();
+    }
+    // Helper for BY_INDEX matching. BY_INDEX reuses the TYPE_INT identifier as the table-side file
+    // position, matching DuckDB's typed identifier plus mapper-mode interpretation.
+    int32_t get_identifier_position() const {
+        DORIS_CHECK(has_identifier_field_id());
+        return identifier.get<TYPE_INT>();
+    }
+
+    // Helper for reader-local projection and scan requests.
+    int32_t file_local_id() const {
+        if (local_id != -1) {
+            return local_id;
+        }
+        return get_identifier_field_id();
+    }
+
+    std::string debug_string() const;
+};
+
+static constexpr int ROW_POSITION_COLUMN_ID = -10001;
+static constexpr const char* ROW_POSITION_COLUMN_NAME = "__file_row_position";
+static constexpr int GLOBAL_ROWID_COLUMN_ID = -10002;
+
+inline ColumnDefinition row_position_column_definition() {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(ROW_POSITION_COLUMN_ID);
+    field.local_id = ROW_POSITION_COLUMN_ID;
+    field.name = ROW_POSITION_COLUMN_NAME;
+    field.type = std::make_shared<DataTypeInt64>();
+    field.column_type = ColumnType::ROW_NUMBER;
+    return field;
+}
+
+inline ColumnDefinition global_rowid_column_definition() {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_STRING>(BeConsts::GLOBAL_ROWID_COL);
+    field.local_id = GLOBAL_ROWID_COLUMN_ID;
+    field.name = BeConsts::GLOBAL_ROWID_COL;
+    field.type = std::make_shared<DataTypeString>();
+    field.column_type = ColumnType::GLOBAL_ROWID;
+    return field;
+}
+
+// Recursive file-local projection path.
+//
+// For a root entry in FileScanRequest::{predicate_columns, non_predicate_columns}, index is the
+// top-level file column id and column_id() is valid. For children, index is the file-local child id
+// under the parent node. This is the reader schema local id, not an Iceberg/Parquet field id, not a
+// table child id, and not a child output ordinal.
+//
+// project_all_children=true means the whole subtree under this node is needed. When false, children
+// lists the selected child paths. File readers can use this to avoid constructing readers for
+// unprojected nested children.
+struct LocalColumnIndex {
+    int32_t index = -1;
+    bool project_all_children = true;
+    std::vector<LocalColumnIndex> children {};
+
+    static LocalColumnIndex top_level(LocalColumnId column_id) {
+        return {.index = column_id.value()};
+    }
+
+    static LocalColumnIndex local(int32_t local_id) { return {.index = local_id}; }
+
+    static LocalColumnIndex partial_local(int32_t local_id) {
+        return {.index = local_id, .project_all_children = false};
+    }
+
+    LocalColumnId column_id() const { return LocalColumnId(index); }
+    int32_t local_id() const { return index; }
+    std::string debug_string() const;
+};
+
+inline bool is_full_projection(const LocalColumnIndex* projection) {
+    return projection == nullptr || projection->project_all_children;
+}
+
+inline bool is_partial_projection(const LocalColumnIndex* projection) {
+    return projection != nullptr && !projection->project_all_children;
+}
+
+inline const LocalColumnIndex* find_child_projection(const LocalColumnIndex* projection,
+                                                     int32_t local_id) {
+    if (is_full_projection(projection)) {
+        return nullptr;
+    }
+    const auto child_it = std::find_if(
+            projection->children.begin(), projection->children.end(),
+            [&](const LocalColumnIndex& child) { return child.local_id() == local_id; });
+    return child_it == projection->children.end() ? nullptr : &*child_it;
+}
+
+inline bool is_child_projected(const LocalColumnIndex* projection, int32_t local_id) {
+    return is_full_projection(projection) || find_child_projection(projection, local_id) != nullptr;
+}
+
+// Merge two projection trees that point to the same file-local node.
+//
+// A full projection dominates a partial projection. Two partial projections are merged by child id
+// and recursively union their child paths. The caller must only merge projections for the same
+// root/child node.
+inline Status merge_local_column_index(LocalColumnIndex* target, const LocalColumnIndex& source) {
+    DORIS_CHECK(target != nullptr);
+    DORIS_CHECK(target->index == source.index);
+    if (target->project_all_children) {
+        return Status::OK();
+    }
+    if (source.project_all_children) {
+        target->project_all_children = true;
+        target->children.clear();
+        return Status::OK();
+    }
+    for (const auto& source_child : source.children) {
+        auto target_child_it = std::find_if(
+                target->children.begin(), target->children.end(),
+                [&](const LocalColumnIndex& child) { return child.index == source_child.index; });
+        if (target_child_it == target->children.end()) {
+            target->children.push_back(source_child);
+            continue;
+        }
+        RETURN_IF_ERROR(merge_local_column_index(&*target_child_it, source_child));
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/column_mapper.cpp b/be/src/format_v2/column_mapper.cpp
new file mode 100644
index 00000000000000..e6a0e1a28e7422
--- /dev/null
+++ b/be/src/format_v2/column_mapper.cpp
@@ -0,0 +1,2029 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/column_mapper.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <sstream>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/data_type/convert_field_to_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/primitive_type.h"
+#include "exprs/runtime_filter_expr.h"
+#include "exprs/short_circuit_evaluation_expr.h"
+#include "exprs/vcase_expr.h"
+#include "exprs/vcast_expr.h"
+#include "exprs/vcondition_expr.h"
+#include "exprs/vectorized_fn_call.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vin_predicate.h"
+#include "exprs/vliteral.h"
+#include "format_v2/column_mapper_nested.h"
+#include "format_v2/expr/cast.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/schema_projection.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Exprs_types.h"
+
+namespace doris::format {
+
+namespace {
+
+std::string mapping_mode_to_string(TableColumnMappingMode mode) {
+    switch (mode) {
+    case TableColumnMappingMode::BY_FIELD_ID:
+        return "BY_FIELD_ID";
+    case TableColumnMappingMode::BY_NAME:
+        return "BY_NAME";
+    case TableColumnMappingMode::BY_INDEX:
+        return "BY_INDEX";
+    }
+    return "UNKNOWN";
+}
+
+bool column_has_name(const ColumnDefinition& column, const std::string& name) {
+    if (to_lower(column.name) == to_lower(name)) {
+        return true;
+    }
+    if (column.has_identifier_name() && to_lower(column.get_identifier_name()) == to_lower(name)) {
+        return true;
+    }
+    return std::ranges::any_of(column.name_mapping, [&](const std::string& alias) {
+        return to_lower(alias) == to_lower(name);
+    });
+}
+
+bool column_names_match(const ColumnDefinition& lhs, const ColumnDefinition& rhs) {
+    if (column_has_name(rhs, lhs.name)) {
+        return true;
+    }
+    if (lhs.has_identifier_name() && column_has_name(rhs, lhs.get_identifier_name())) {
+        return true;
+    }
+    return std::ranges::any_of(lhs.name_mapping, [&](const std::string& alias) {
+        return column_has_name(rhs, alias);
+    });
+}
+
+class ColumnMatcher {
+public:
+    virtual ~ColumnMatcher() = default;
+    virtual const ColumnDefinition* find(
+            const ColumnDefinition& table_column,
+            const std::vector<ColumnDefinition>& file_schema) const = 0;
+};
+
+class FieldIdMatcher final : public ColumnMatcher {
+public:
+    const ColumnDefinition* find(const ColumnDefinition& table_column,
+                                 const std::vector<ColumnDefinition>& file_schema) const override {
+        if (!table_column.has_identifier_field_id()) {
+            return nullptr;
+        }
+        const auto field_id = table_column.get_identifier_field_id();
+        const auto field_it = std::ranges::find_if(file_schema, [&](const ColumnDefinition& field) {
+            return field.has_identifier_field_id() && field.get_identifier_field_id() == field_id;
+        });
+        return field_it == file_schema.end() ? nullptr : &*field_it;
+    }
+};
+
+class NameMatcher final : public ColumnMatcher {
+public:
+    const ColumnDefinition* find(const ColumnDefinition& table_column,
+                                 const std::vector<ColumnDefinition>& file_schema) const override {
+        const auto field_it = std::ranges::find_if(file_schema, [&](const ColumnDefinition& field) {
+            return column_names_match(table_column, field);
+        });
+        return field_it == file_schema.end() ? nullptr : &*field_it;
+    }
+};
+
+class PositionMatcher final : public ColumnMatcher {
+public:
+    const ColumnDefinition* find(const ColumnDefinition& table_column,
+                                 const std::vector<ColumnDefinition>& file_schema) const override {
+        if (!table_column.has_identifier_field_id()) {
+            return nullptr;
+        }
+        const auto position = table_column.get_identifier_position();
+        if (position < 0 || static_cast<size_t>(position) >= file_schema.size()) {
+            return nullptr;
+        }
+        return &file_schema[static_cast<size_t>(position)];
+    }
+};
+
+const ColumnMatcher& matcher_for_mode(TableColumnMappingMode mode) {
+    static const FieldIdMatcher field_id_matcher;
+    static const NameMatcher name_matcher;
+    static const PositionMatcher position_matcher;
+    switch (mode) {
+    case TableColumnMappingMode::BY_FIELD_ID:
+        return field_id_matcher;
+    case TableColumnMappingMode::BY_NAME:
+        return name_matcher;
+    case TableColumnMappingMode::BY_INDEX:
+        return position_matcher;
+    }
+    return field_id_matcher;
+}
+
+std::string virtual_column_type_to_string(TableVirtualColumnType type) {
+    switch (type) {
+    case TableVirtualColumnType::INVALID:
+        return "INVALID";
+    case TableVirtualColumnType::ROW_ID:
+        return "ROW_ID";
+    case TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER:
+        return "LAST_UPDATED_SEQUENCE_NUMBER";
+    case TableVirtualColumnType::ICEBERG_ROWID:
+        return "ICEBERG_ROWID";
+    }
+    return "UNKNOWN";
+}
+
+std::string filter_conversion_type_to_string(FilterConversionType type) {
+    switch (type) {
+    case FilterConversionType::COPY_DIRECTLY:
+        return "COPY_DIRECTLY";
+    case FilterConversionType::CAST_FILTER:
+        return "CAST_FILTER";
+    case FilterConversionType::READER_EXPRESSION:
+        return "READER_EXPRESSION";
+    case FilterConversionType::FINALIZE_ONLY:
+        return "FINALIZE_ONLY";
+    case FilterConversionType::CONSTANT:
+        return "CONSTANT";
+    }
+    return "UNKNOWN";
+}
+
+std::string data_type_debug_string(const DataTypePtr& type) {
+    return type == nullptr ? "null" : type->get_name();
+}
+
+std::string field_debug_string(const Field& field) {
+    std::ostringstream out;
+    out << "Field{type=" << type_to_string(field.get_type()) << ", value=";
+    switch (field.get_type()) {
+    case TYPE_NULL:
+        out << "null";
+        break;
+    case TYPE_INT:
+        out << field.get<TYPE_INT>();
+        break;
+    case TYPE_BIGINT:
+        out << field.get<TYPE_BIGINT>();
+        break;
+    case TYPE_STRING:
+        out << field.get<TYPE_STRING>();
+        break;
+    default:
+        out << field.to_debug_string(0);
+        break;
+    }
+    out << "}";
+    return out.str();
+}
+
+template <typename T, typename Formatter>
+std::string join_debug_strings(const std::vector<T>& values, Formatter formatter) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t i = 0; i < values.size(); ++i) {
+        if (i > 0) {
+            out << ", ";
+        }
+        out << formatter(values[i]);
+    }
+    out << "]";
+    return out.str();
+}
+
+} // namespace
+
+const Field* find_partition_value(const ColumnDefinition& table_column,
+                                  const std::map<std::string, Field>& partition_values) {
+    const auto find_by_name = [&](const std::string& name) -> const Field* {
+        const auto value_it = partition_values.find(name);
+        return value_it == partition_values.end() ? nullptr : &value_it->second;
+    };
+    if (const auto* value = find_by_name(table_column.name); value != nullptr) {
+        return value;
+    }
+    if (table_column.has_identifier_name()) {
+        if (const auto* value = find_by_name(table_column.get_identifier_name());
+            value != nullptr) {
+            return value;
+        }
+    }
+    for (const auto& alias : table_column.name_mapping) {
+        if (const auto* value = find_by_name(alias); value != nullptr) {
+            return value;
+        }
+    }
+    return nullptr;
+}
+
+struct FileSlotRewriteInfo {
+    size_t block_position = 0;
+    DataTypePtr file_type;
+    DataTypePtr table_type;
+    std::string file_column_name;
+};
+
+struct RewriteContext {
+    RuntimeState* runtime_state = nullptr;
+    std::vector<VExprSPtr> created_exprs {};
+
+    void add_created_expr(VExprSPtr expr) { created_exprs.push_back(std::move(expr)); }
+
+    Status prepare_created_exprs(VExprContext* context) const {
+        DORIS_CHECK(context != nullptr);
+        RowDescriptor row_desc;
+        for (const auto& expr : created_exprs) {
+            if (dynamic_cast<const Cast*>(expr.get()) != nullptr && runtime_state == nullptr) {
+                return Status::InvalidArgument(
+                        "RuntimeState is required to prepare rewritten cast expression {}",
+                        expr->expr_name());
+            }
+            RETURN_IF_ERROR(expr->prepare(runtime_state, row_desc, context));
+        }
+        return Status::OK();
+    }
+};
+
+static VExprSPtr create_file_slot_ref(const VSlotRef& slot_ref,
+                                      const FileSlotRewriteInfo& rewrite_info,
+                                      RewriteContext* rewrite_context) {
+    auto ref =
+            VSlotRef::create_shared(slot_ref.slot_id(), cast_set<int>(rewrite_info.block_position),
+                                    -1, rewrite_info.file_type, rewrite_info.file_column_name);
+    rewrite_context->add_created_expr(ref);
+    return ref;
+}
+
+static bool is_cast_expr(const VExprSPtr& expr) {
+    return dynamic_cast<const Cast*>(expr.get()) != nullptr;
+}
+
+static bool is_binary_comparison_predicate(const VExprSPtr& expr) {
+    if (expr == nullptr || expr->get_num_children() != 2 ||
+        (expr->node_type() != TExprNodeType::BINARY_PRED &&
+         expr->node_type() != TExprNodeType::NULL_AWARE_BINARY_PRED)) {
+        return false;
+    }
+    switch (expr->op()) {
+    case TExprOpcode::EQ:
+    case TExprOpcode::EQ_FOR_NULL:
+    case TExprOpcode::NE:
+    case TExprOpcode::GE:
+    case TExprOpcode::GT:
+    case TExprOpcode::LE:
+    case TExprOpcode::LT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+std::string TableColumnMapperOptions::debug_string() const {
+    std::ostringstream out;
+    out << "TableColumnMapperOptions{mode=" << mapping_mode_to_string(mode) << "}";
+    return out.str();
+}
+
+std::string ColumnDefinition::debug_string() const {
+    std::ostringstream out;
+    out << "ColumnDefinition{name=" << name << ", identifier=" << field_debug_string(identifier)
+        << ", name_mapping="
+        << join_debug_strings(name_mapping, [](const std::string& name) { return name; })
+        << ", local_id=" << local_id << ", type=" << data_type_debug_string(type) << ", children="
+        << join_debug_strings(children,
+                              [](const ColumnDefinition& child) { return child.debug_string(); })
+        << ", has_default_expr=" << (default_expr != nullptr)
+        << ", is_partition_key=" << is_partition_key << "}";
+    return out.str();
+}
+
+std::string LocalColumnIndex::debug_string() const {
+    std::ostringstream out;
+    out << "LocalColumnIndex{index=" << index << ", project_all_children=" << project_all_children
+        << ", children="
+        << join_debug_strings(children,
+                              [](const LocalColumnIndex& child) { return child.debug_string(); })
+        << "}";
+    return out.str();
+}
+
+std::string ColumnMapping::debug_string() const {
+    std::ostringstream out;
+    out << "ColumnMapping{global_index=" << global_index
+        << ", table_column_name=" << table_column_name << ", file_local_id=";
+    if (file_local_id.has_value()) {
+        out << *file_local_id;
+    } else {
+        out << "null";
+    }
+    out << ", constant_index=";
+    if (constant_index.has_value()) {
+        out << *constant_index;
+    } else {
+        out << "null";
+    }
+    out << ", file_column_name=" << file_column_name
+        << ", original_file_type=" << data_type_debug_string(original_file_type)
+        << ", original_file_children="
+        << join_debug_strings(original_file_children,
+                              [](const ColumnDefinition& child) { return child.debug_string(); })
+        << ", file_type=" << data_type_debug_string(file_type)
+        << ", table_type=" << data_type_debug_string(table_type)
+        << ", has_projection=" << (projection != nullptr) << ", child_mappings="
+        << join_debug_strings(child_mappings,
+                              [](const ColumnMapping& child) { return child.debug_string(); })
+        << ", is_trivial=" << is_trivial << ", is_constant=" << constant_index.has_value()
+        << ", filter_conversion=" << filter_conversion_type_to_string(filter_conversion)
+        << ", virtual_column_type=" << virtual_column_type_to_string(virtual_column_type)
+        << ", has_default_expr=" << (default_expr != nullptr) << "}";
+    return out.str();
+}
+
+std::string TableColumnMapper::debug_string() const {
+    std::ostringstream out;
+    out << "TableColumnMapper{options=" << _options.debug_string() << ", mappings="
+        << join_debug_strings(_mappings,
+                              [](const ColumnMapping& mapping) { return mapping.debug_string(); })
+        << ", hidden_mappings="
+        << join_debug_strings(_hidden_mappings,
+                              [](const ColumnMapping& mapping) { return mapping.debug_string(); })
+        << ", constant_count=" << _constant_map.size() << "}";
+    return out.str();
+}
+
+static const FileSlotRewriteInfo* find_slot_rewrite_info(
+        const VExprSPtr& expr,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        const VSlotRef** slot_ref) {
+    if (expr == nullptr) {
+        return nullptr;
+    }
+    VExprSPtr slot_expr = expr;
+    const bool input_is_cast = is_cast_expr(expr) && expr->get_num_children() == 1;
+    if (is_cast_expr(expr) && expr->get_num_children() == 1) {
+        slot_expr = expr->children()[0];
+    }
+    if (!slot_expr->is_slot_ref()) {
+        return nullptr;
+    }
+    const auto* candidate_slot_ref = assert_cast<const VSlotRef*>(slot_expr.get());
+    const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*candidate_slot_ref));
+    if (rewrite_it == global_to_file_slot.end()) {
+        return nullptr;
+    }
+    if (input_is_cast && !expr->data_type()->equals(*rewrite_it->second.table_type)) {
+        return nullptr;
+    }
+    if (slot_ref != nullptr) {
+        *slot_ref = candidate_slot_ref;
+    }
+    return &rewrite_it->second;
+}
+
+static bool filter_conversion_has_local_source(FilterConversionType conversion) {
+    switch (conversion) {
+    case FilterConversionType::COPY_DIRECTLY:
+    case FilterConversionType::CAST_FILTER:
+    case FilterConversionType::READER_EXPRESSION:
+        return true;
+    case FilterConversionType::FINALIZE_ONLY:
+    case FilterConversionType::CONSTANT:
+        return false;
+    }
+    return false;
+}
+
+static bool column_predicate_can_use_local_source(FilterConversionType conversion) {
+    switch (conversion) {
+    case FilterConversionType::COPY_DIRECTLY:
+        return true;
+    case FilterConversionType::CAST_FILTER:
+    case FilterConversionType::READER_EXPRESSION:
+    case FilterConversionType::FINALIZE_ONLY:
+    case FilterConversionType::CONSTANT:
+        return false;
+    }
+    return false;
+}
+
+static bool table_filter_has_only_local_entries(
+        const TableFilter& table_filter, const std::map<GlobalIndex, FilterEntry>& filter_entries) {
+    for (const auto global_index : table_filter.global_indices) {
+        const auto entry_it = filter_entries.find(global_index);
+        if (entry_it == filter_entries.end() || !entry_it->second.is_local()) {
+            return false;
+        }
+    }
+    return true;
+}
+
+static VExprSPtr unwrap_literal_for_file_cast(const VExprSPtr& expr,
+                                              const DataTypePtr& table_type) {
+    if (expr == nullptr) {
+        return nullptr;
+    }
+    if (expr->is_literal()) {
+        return expr;
+    }
+    if (is_cast_expr(expr) && expr->get_num_children() == 1 && expr->children()[0]->is_literal() &&
+        expr->children()[0]->data_type()->equals(*table_type)) {
+        return expr->children()[0];
+    }
+    return nullptr;
+}
+
+static Field literal_field_from_expr(const VExpr& literal_expr) {
+    DORIS_CHECK(literal_expr.is_literal());
+    const auto* literal = dynamic_cast<const VLiteral*>(&literal_expr);
+    DORIS_CHECK(literal != nullptr);
+    Field field;
+    literal->get_column_ptr()->get(0, field);
+    return field;
+}
+
+// Table filter localization clones an already-prepared table expr and then rewrites it to file
+// slots. Only split-local literals and BE cast nodes need table-reader-specific clone behavior;
+// plain slot refs and literals use their own VExpr::clone_node().
+static Status clone_table_expr_node(const VExpr& expr, VExprSPtr* cloned_expr) {
+    DORIS_CHECK(cloned_expr != nullptr);
+    if (const auto* split_literal = dynamic_cast<const SplitLocalFileLiteral*>(&expr)) {
+        *cloned_expr = std::make_shared<SplitLocalFileLiteral>(
+                split_literal->data_type(), literal_field_from_expr(expr),
+                split_literal->original_type(), split_literal->original_field());
+    } else if (const auto* vcast_expr = dynamic_cast<const VCastExpr*>(&expr);
+               vcast_expr != nullptr && vcast_expr->node_type() == TExprNodeType::CAST_EXPR) {
+        *cloned_expr = Cast::create_shared(vcast_expr->data_type());
+    }
+    return Status::OK();
+}
+
+Status clone_table_expr_tree(const VExprSPtr& expr, VExprSPtr* cloned_expr) {
+    DORIS_CHECK(cloned_expr != nullptr);
+    if (expr == nullptr) {
+        *cloned_expr = nullptr;
+        return Status::OK();
+    }
+    return expr->deep_clone(cloned_expr, clone_table_expr_node);
+}
+
+static VExprSPtr original_table_literal(const VExprSPtr& literal_expr,
+                                        RewriteContext* rewrite_context = nullptr) {
+    DORIS_CHECK(literal_expr != nullptr);
+    DORIS_CHECK(literal_expr->is_literal());
+    const auto* rewritten_literal = dynamic_cast<const SplitLocalFileLiteral*>(literal_expr.get());
+    if (rewritten_literal == nullptr) {
+        return literal_expr;
+    }
+    auto literal = VLiteral::create_shared(rewritten_literal->original_type(),
+                                           rewritten_literal->original_field());
+    if (rewrite_context != nullptr) {
+        rewrite_context->add_created_expr(literal);
+    }
+    return literal;
+}
+
+static ColumnDefinition hidden_column_from_slot_ref(const VSlotRef& slot_ref) {
+    ColumnDefinition column;
+    column.name = slot_ref.column_name();
+    column.identifier = Field::create_field<TYPE_STRING>(column.name);
+    column.type = slot_ref.data_type();
+    return column;
+}
+
+static void collect_top_level_slot_columns(const VExprSPtr& expr,
+                                           std::map<GlobalIndex, ColumnDefinition>* columns) {
+    DORIS_CHECK(columns != nullptr);
+    if (expr == nullptr) {
+        return;
+    }
+    if (expr->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(expr.get());
+        columns->try_emplace(slot_ref_global_index(*slot_ref),
+                             hidden_column_from_slot_ref(*slot_ref));
+        return;
+    }
+    for (const auto& child : expr->children()) {
+        collect_top_level_slot_columns(child, columns);
+    }
+}
+
+static VExprSPtr rewrite_literal_to_file_type(const VExprSPtr& literal_expr,
+                                              const FileSlotRewriteInfo& rewrite_info,
+                                              RewriteContext* rewrite_context) {
+    DORIS_CHECK(literal_expr != nullptr);
+    DORIS_CHECK(literal_expr->is_literal());
+    const auto original_literal = original_table_literal(literal_expr, rewrite_context);
+    const Field original_field = literal_field(original_literal);
+    if (rewrite_info.file_type->equals(*original_literal->data_type())) {
+        return original_literal;
+    }
+    Field file_field;
+    try {
+        convert_field_to_type(original_field, *rewrite_info.file_type, &file_field,
+                              original_literal->data_type().get());
+    } catch (const Exception&) {
+        return nullptr;
+    }
+    if (file_field.is_null()) {
+        return nullptr;
+    }
+    if (file_field.get_type() != remove_nullable(rewrite_info.file_type)->get_primitive_type()) {
+        return nullptr;
+    }
+    auto literal = std::make_shared<SplitLocalFileLiteral>(
+            rewrite_info.file_type, file_field, original_literal->data_type(), original_field);
+    rewrite_context->add_created_expr(literal);
+    return literal;
+}
+
+static bool rewrite_binary_slot_literal_predicate(
+        const VExprSPtr& expr,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        RewriteContext* rewrite_context) {
+    if (!is_binary_comparison_predicate(expr)) {
+        return false;
+    }
+    auto children = expr->children();
+    const VSlotRef* slot_ref = nullptr;
+    const FileSlotRewriteInfo* rewrite_info =
+            find_slot_rewrite_info(children[0], global_to_file_slot, &slot_ref);
+    int slot_child_idx = 0;
+    int literal_child_idx = 1;
+    if (rewrite_info == nullptr) {
+        rewrite_info = find_slot_rewrite_info(children[1], global_to_file_slot, &slot_ref);
+        slot_child_idx = 1;
+        literal_child_idx = 0;
+    }
+    if (rewrite_info == nullptr || slot_ref == nullptr) {
+        return false;
+    }
+    auto literal_expr =
+            unwrap_literal_for_file_cast(children[literal_child_idx], rewrite_info->table_type);
+    if (literal_expr == nullptr) {
+        return false;
+    }
+
+    auto rewritten_literal =
+            rewrite_literal_to_file_type(literal_expr, *rewrite_info, rewrite_context);
+    if (rewritten_literal == nullptr) {
+        children[literal_child_idx] = original_table_literal(literal_expr, rewrite_context);
+        expr->set_children(std::move(children));
+        return false;
+    }
+
+    children[slot_child_idx] = create_file_slot_ref(*slot_ref, *rewrite_info, rewrite_context);
+    children[literal_child_idx] = std::move(rewritten_literal);
+    expr->set_children(std::move(children));
+    return true;
+}
+
+static bool rewrite_in_slot_literal_predicate(
+        const VExprSPtr& expr,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        RewriteContext* rewrite_context) {
+    if (expr->node_type() != TExprNodeType::IN_PRED || expr->get_num_children() < 2) {
+        return false;
+    }
+    auto children = expr->children();
+    const VSlotRef* slot_ref = nullptr;
+    const FileSlotRewriteInfo* rewrite_info =
+            find_slot_rewrite_info(children[0], global_to_file_slot, &slot_ref);
+    if (rewrite_info == nullptr || slot_ref == nullptr) {
+        return false;
+    }
+
+    VExprSPtrs rewritten_literals;
+    rewritten_literals.reserve(children.size() - 1);
+    for (size_t child_idx = 1; child_idx < children.size(); ++child_idx) {
+        auto literal_expr =
+                unwrap_literal_for_file_cast(children[child_idx], rewrite_info->table_type);
+        if (literal_expr == nullptr) {
+            return false;
+        }
+        auto rewritten_literal =
+                rewrite_literal_to_file_type(literal_expr, *rewrite_info, rewrite_context);
+        if (rewritten_literal == nullptr) {
+            for (size_t restore_idx = 1; restore_idx < children.size(); ++restore_idx) {
+                auto restore_literal = unwrap_literal_for_file_cast(children[restore_idx],
+                                                                    rewrite_info->table_type);
+                if (restore_literal != nullptr) {
+                    children[restore_idx] =
+                            original_table_literal(restore_literal, rewrite_context);
+                }
+            }
+            expr->set_children(std::move(children));
+            return false;
+        }
+        rewritten_literals.push_back(std::move(rewritten_literal));
+    }
+
+    children[0] = create_file_slot_ref(*slot_ref, *rewrite_info, rewrite_context);
+    for (size_t literal_idx = 0; literal_idx < rewritten_literals.size(); ++literal_idx) {
+        children[literal_idx + 1] = std::move(rewritten_literals[literal_idx]);
+    }
+    expr->set_children(std::move(children));
+    return true;
+}
+
+static VExprSPtr create_file_struct_child_name_literal(const std::string& file_child_name,
+                                                       RewriteContext* rewrite_context) {
+    auto literal = VLiteral::create_shared(std::make_shared<DataTypeString>(),
+                                           Field::create_field<TYPE_STRING>(file_child_name));
+    rewrite_context->add_created_expr(literal);
+    return literal;
+}
+
+static bool needs_complex_file_slot_cast(const DataTypePtr& file_type,
+                                         const DataTypePtr& table_type) {
+    if (file_type == nullptr || table_type == nullptr || file_type->equals(*table_type)) {
+        return false;
+    }
+    const auto file_nested_type = remove_nullable(file_type);
+    const auto table_nested_type = remove_nullable(table_type);
+    if (file_nested_type->equals(*table_nested_type)) {
+        return false;
+    }
+    return is_complex_type(file_nested_type->get_primitive_type()) ||
+           is_complex_type(table_nested_type->get_primitive_type());
+}
+
+static bool collect_struct_element_chain(const VExprSPtr& expr, std::vector<VExprSPtr>* chain) {
+    DORIS_CHECK(chain != nullptr);
+    if (!is_struct_element_expr(expr)) {
+        return false;
+    }
+    const auto& parent = expr->children()[0];
+    if (is_struct_element_expr(parent)) {
+        if (!collect_struct_element_chain(parent, chain)) {
+            return false;
+        }
+    } else if (!parent->is_slot_ref()) {
+        // Only support file-local rewrite for struct child chains rooted directly at a top-level
+        // slot, for example `element_at(s, 'a')` or `element_at(element_at(s, 'a'), 'b')`.
+        //
+        // Do not localize computed complex parents such as
+        // `element_at(element_at(map_values(m), 1), 'full_name')`. The intermediate map/array
+        // result has already been reshaped by scan projection and may have a different child order
+        // from the table expression. Partially rewriting that expression against the file block can
+        // silently evaluate the wrong struct child and filter out valid rows. Those predicates must
+        // remain as table-level conjuncts and be evaluated after TableReader materialization.
+        return false;
+    }
+    chain->push_back(expr);
+    return true;
+}
+
+static bool rewrite_struct_element_path_to_file_expr(
+        const VExprSPtr& expr, const std::vector<ColumnMapping>& mappings,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        RewriteContext* rewrite_context) {
+    ResolvedNestedStructPath resolved;
+    if (!resolve_nested_struct_expr_for_file(expr, mappings, &resolved)) {
+        return false;
+    }
+
+    std::vector<VExprSPtr> struct_element_chain;
+    if (!collect_struct_element_chain(expr, &struct_element_chain) ||
+        struct_element_chain.size() != resolved.file_child_names.size() ||
+        struct_element_chain.size() != resolved.file_child_types.size()) {
+        return false;
+    }
+
+    auto root_children = struct_element_chain.front()->children();
+    if (!root_children[0]->is_slot_ref()) {
+        return false;
+    }
+    const auto* slot_ref = assert_cast<const VSlotRef*>(root_children[0].get());
+    const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref));
+    if (rewrite_it == global_to_file_slot.end()) {
+        return false;
+    }
+
+    // File-local conjuncts are prepared against the file-reader Block, so both the root slot and
+    // every struct selector must be expressed in file schema terms. For a renamed Iceberg field,
+    // keeping the table selector would prepare `element_at(file_struct<rename>, 'renamed')` and
+    // fail before any rows are read. Rewrite the whole chain while ColumnMapping still preserves
+    // the table-to-file relationship. Example:
+    //   table filter: element_at(element_at(s, 'renamed_parent'), 'renamed_leaf')
+    //   old file:     s<parent<leaf>>
+    //   file filter:  element_at(element_at(s, 'parent'), 'leaf')
+    root_children[0] = create_file_slot_ref(*slot_ref, rewrite_it->second, rewrite_context);
+    struct_element_chain.front()->set_children(std::move(root_children));
+    for (size_t idx = 0; idx < struct_element_chain.size(); ++idx) {
+        auto children = struct_element_chain[idx]->children();
+        children[1] = create_file_struct_child_name_literal(resolved.file_child_names[idx],
+                                                            rewrite_context);
+        struct_element_chain[idx]->set_children(std::move(children));
+        // The selector name and the expression return type must be moved to file schema together.
+        // Example:
+        //   table filter: element_at(element_at(s, 'new_a'), 'new_aa') = 50
+        //   old file:     s.new_a STRUCT<aa, bb>
+        //   file filter:  element_at(element_at(s, 'new_a'), 'aa') = 50
+        //
+        // If the inner element_at keeps the table return type STRUCT<new_aa, bb>, preparing the
+        // outer element_at(..., 'aa') fails before scanning because `aa` is not a table field.
+        struct_element_chain[idx]->data_type() = resolved.file_child_types[idx];
+    }
+    return true;
+}
+
+static VExprSPtr rewrite_table_expr_to_file_expr(
+        const VExprSPtr& expr,
+        const std::map<GlobalIndex, FileSlotRewriteInfo>& global_to_file_slot,
+        const std::vector<ColumnMapping>& filter_mappings, RewriteContext* rewrite_context,
+        bool* can_localize) {
+    if (expr == nullptr) {
+        return nullptr;
+    }
+    DORIS_CHECK(rewrite_context != nullptr);
+    DORIS_CHECK(can_localize != nullptr);
+    if (auto* runtime_filter = dynamic_cast<RuntimeFilterExpr*>(expr.get());
+        runtime_filter != nullptr) {
+        auto impl = runtime_filter->get_impl();
+        if (impl == nullptr) {
+            *can_localize = false;
+            return expr;
+        }
+        auto localized_impl = rewrite_table_expr_to_file_expr(
+                impl, global_to_file_slot, filter_mappings, rewrite_context, can_localize);
+        if (!*can_localize) {
+            return expr;
+        }
+        runtime_filter->set_impl(std::move(localized_impl));
+        return expr;
+    }
+    if (rewrite_binary_slot_literal_predicate(expr, global_to_file_slot, rewrite_context)) {
+        return expr;
+    }
+    if (rewrite_in_slot_literal_predicate(expr, global_to_file_slot, rewrite_context)) {
+        return expr;
+    }
+    if (is_struct_element_expr(expr)) {
+        if (!rewrite_struct_element_path_to_file_expr(expr, filter_mappings, global_to_file_slot,
+                                                      rewrite_context)) {
+            // The scanner still evaluates the original table-level conjunct after TableReader
+            // finalizes the output block. Skipping an unlocalizable file conjunct is therefore
+            // safer than preparing a partially rewritten expression against the wrong struct
+            // layout. In particular, do not generate file-local conjuncts for computed complex
+            // parents such as `element_at(element_at(map_values(m), 1), 'field')`; only direct
+            // slot-rooted struct chains are supported here.
+            *can_localize = false;
+        }
+        return expr;
+    }
+    if (expr->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(expr.get());
+        const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref));
+        if (rewrite_it != global_to_file_slot.end()) {
+            const auto& rewrite_info = rewrite_it->second;
+            auto file_slot = create_file_slot_ref(*slot_ref, rewrite_info, rewrite_context);
+            if (rewrite_info.file_type->equals(*rewrite_info.table_type)) {
+                return file_slot;
+            }
+            if (needs_complex_file_slot_cast(rewrite_info.file_type, rewrite_info.table_type)) {
+                // Generic file-local expressions cannot safely cast an evolved complex file slot
+                // back to the table type. Example:
+                //
+                //   table filter: ARRAY_CONTAINS(MAP_KEYS(m), 'person5')
+                //   old file:     m MAP<STRING, STRUCT<name, age>>
+                //   table:        m MAP<STRING, STRUCT<age, full_name, gender>>
+                //
+                // Although MAP_KEYS only reads the key column, wrapping the file slot as
+                // `CAST(file_m AS table_m)` forces the value struct cast first and fails because
+                // the old and new value structs have different fields. Keep such filters at the
+                // table level, where TableReader materializes the evolved complex value before
+                // Scanner evaluates the original conjunct. Direct slot-rooted struct child paths
+                // are handled by rewrite_struct_element_path_to_file_expr() above.
+                *can_localize = false;
+                return expr;
+            }
+            auto cast_expr = Cast::create_shared(rewrite_info.table_type);
+            cast_expr->add_child(std::move(file_slot));
+            rewrite_context->add_created_expr(cast_expr);
+            return cast_expr;
+        }
+        return expr;
+    }
+    // The input is a split-local cloned tree. A previous split-local clone may already have
+    // inserted Cast(slot). Keep that rewrite idempotent: rewrite the cast child from table slot to
+    // the current split's file slot, and drop the cast when the current split no longer needs it.
+    if (is_cast_expr(expr) && expr->get_num_children() == 1) {
+        const auto& child = expr->children()[0];
+        if (child->is_slot_ref()) {
+            const auto* slot_ref = assert_cast<const VSlotRef*>(child.get());
+            const auto rewrite_it = global_to_file_slot.find(slot_ref_global_index(*slot_ref));
+            if (rewrite_it != global_to_file_slot.end() &&
+                expr->data_type()->equals(*rewrite_it->second.table_type)) {
+                auto rewritten_child =
+                        create_file_slot_ref(*slot_ref, rewrite_it->second, rewrite_context);
+                if (rewrite_it->second.file_type->equals(*rewrite_it->second.table_type)) {
+                    return rewritten_child;
+                }
+                if (needs_complex_file_slot_cast(rewrite_it->second.file_type,
+                                                 rewrite_it->second.table_type)) {
+                    *can_localize = false;
+                    return expr;
+                }
+                expr->set_children({std::move(rewritten_child)});
+                return expr;
+            }
+        }
+    }
+
+    VExprSPtrs rewritten_children;
+    rewritten_children.reserve(expr->children().size());
+    for (const auto& child : expr->children()) {
+        rewritten_children.push_back(rewrite_table_expr_to_file_expr(
+                child, global_to_file_slot, filter_mappings, rewrite_context, can_localize));
+    }
+    expr->set_children(std::move(rewritten_children));
+    return expr;
+}
+
+static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id";
+static constexpr const char* ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER = "_last_updated_sequence_number";
+static constexpr int32_t ROW_LINEAGE_ROW_ID_FIELD_ID = 2147483540;
+static constexpr int32_t ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER_FIELD_ID = 2147483539;
+
+static TableVirtualColumnType row_lineage_virtual_column_type(const std::string& column_name) {
+    if (column_name == ROW_LINEAGE_ROW_ID) {
+        return TableVirtualColumnType::ROW_ID;
+    }
+    if (column_name == ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER) {
+        return TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER;
+    }
+    return TableVirtualColumnType::INVALID;
+}
+
+static TableVirtualColumnType row_lineage_virtual_column_type_by_field_id(
+        const ColumnDefinition& column) {
+    if (!column.has_identifier_field_id()) {
+        return TableVirtualColumnType::INVALID;
+    }
+    switch (column.get_identifier_field_id()) {
+    case ROW_LINEAGE_ROW_ID_FIELD_ID:
+        return TableVirtualColumnType::ROW_ID;
+    case ROW_LINEAGE_LAST_UPDATED_SEQ_NUMBER_FIELD_ID:
+        return TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER;
+    default:
+        return TableVirtualColumnType::INVALID;
+    }
+}
+
+static TableVirtualColumnType row_lineage_virtual_column_type(const ColumnDefinition& column,
+                                                              TableColumnMappingMode mode) {
+    switch (mode) {
+    case TableColumnMappingMode::BY_FIELD_ID:
+        return row_lineage_virtual_column_type_by_field_id(column);
+    case TableColumnMappingMode::BY_NAME:
+    case TableColumnMappingMode::BY_INDEX:
+        return row_lineage_virtual_column_type(column.name);
+    }
+    return TableVirtualColumnType::INVALID;
+}
+
+// Returns true when the current file type is not the exact nested type the scan should expose.
+// This is about building the projected file-side type/projection, not about whether TableReader
+// later needs to rematerialize the complex value back to table layout.
+static bool needs_projected_file_type_rebuild(const ColumnMapping& mapping) {
+    if (!is_complex_type(mapping.file_type->get_primitive_type())) {
+        return false;
+    }
+    if (mapping.child_mappings.empty()) {
+        return false;
+    }
+    DORIS_CHECK(mapping.file_type != nullptr);
+    DORIS_CHECK(mapping.table_type != nullptr);
+    if (remove_nullable(mapping.file_type)->get_primitive_type() !=
+        remove_nullable(mapping.table_type)->get_primitive_type()) {
+        return true;
+    }
+    if (!mapping.table_type->equals(*mapping.file_type)) {
+        return true;
+    }
+    for (const auto& child_mapping : mapping.child_mappings) {
+        // Rename-only child mappings do not change the file-side projected shape. If field-id
+        // matching maps table child `renamed_b` to file child `b`, the file reader can still expose
+        // the original file type as long as child count/order/types are unchanged.
+        if (!child_mapping.file_local_id.has_value() ||
+            needs_projected_file_type_rebuild(child_mapping)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static std::optional<size_t> file_child_ordinal_in_scan_type(const ColumnMapping& mapping,
+                                                             const ColumnMapping& child_mapping) {
+    if (!child_mapping.file_local_id.has_value()) {
+        return std::nullopt;
+    }
+    const auto& file_children = !mapping.projected_file_children.empty()
+                                        ? mapping.projected_file_children
+                                        : mapping.original_file_children;
+    const auto child_it = std::ranges::find_if(file_children, [&](const ColumnDefinition& child) {
+        return child.file_local_id() == *child_mapping.file_local_id;
+    });
+    if (child_it == file_children.end()) {
+        return std::nullopt;
+    }
+    return static_cast<size_t>(std::distance(file_children.begin(), child_it));
+}
+
+static bool needs_complex_rematerialize(const ColumnMapping& mapping) {
+    if (mapping.child_mappings.empty()) {
+        return false;
+    }
+    if (mapping.table_type == nullptr || mapping.file_type == nullptr ||
+        !mapping.table_type->equals(*mapping.file_type)) {
+        return true;
+    }
+    for (size_t table_child_idx = 0; table_child_idx < mapping.child_mappings.size();
+         ++table_child_idx) {
+        const auto& child_mapping = mapping.child_mappings[table_child_idx];
+        const auto file_child_idx = file_child_ordinal_in_scan_type(mapping, child_mapping);
+        if (!file_child_idx.has_value() || *file_child_idx != table_child_idx ||
+            needs_complex_rematerialize(child_mapping) ||
+            (child_mapping.table_type != nullptr && child_mapping.file_type != nullptr &&
+             !child_mapping.table_type->equals(*child_mapping.file_type))) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool mapping_can_use_file_column_directly(const ColumnMapping& mapping) {
+    if (mapping.table_type == nullptr || mapping.file_type == nullptr) {
+        return false;
+    }
+    const auto table_type = remove_nullable(mapping.table_type);
+    const auto file_type = remove_nullable(mapping.file_type);
+    const bool same_timestamptz_with_different_scale =
+            table_type->get_primitive_type() == TYPE_TIMESTAMPTZ &&
+            file_type->get_primitive_type() == TYPE_TIMESTAMPTZ;
+    if (!mapping.table_type->equals(*mapping.file_type) && !same_timestamptz_with_different_scale) {
+        return false;
+    }
+    return !needs_complex_rematerialize(mapping);
+}
+
+static const ColumnDefinition* find_file_child_for_mapping(const ColumnDefinition& table_child,
+                                                           const ColumnDefinition& file_parent,
+                                                           TableColumnMappingMode mode,
+                                                           size_t table_child_idx,
+                                                           bool allow_ordinal_fallback) {
+    const auto file_parent_type = remove_nullable(file_parent.type)->get_primitive_type();
+    switch (file_parent_type) {
+    case TYPE_ARRAY:
+        DORIS_CHECK(file_parent.children.size() == 1);
+        return &file_parent.children[0];
+    case TYPE_MAP:
+        DORIS_CHECK(file_parent.children.size() == 2);
+        if (table_child.name == "key") {
+            return &file_parent.children[0];
+        }
+        if (table_child.name == "value") {
+            return &file_parent.children[1];
+        }
+        if (table_child.local_id == 0 || table_child.local_id == 1) {
+            return &file_parent.children[table_child.local_id];
+        }
+        return nullptr;
+    default:
+        // Hive BY_INDEX is a top-level column matching rule. Once a complex root is selected by
+        // file position, nested struct children follow Hive reader's historical name matching
+        // semantics; their integer identifiers can be field ids, not file positions.
+        const auto nested_mode =
+                mode == TableColumnMappingMode::BY_INDEX ? TableColumnMappingMode::BY_NAME : mode;
+        if (const auto* file_child =
+                    matcher_for_mode(nested_mode).find(table_child, file_parent.children);
+            file_child != nullptr) {
+            return file_child;
+        }
+        if (allow_ordinal_fallback && mode == TableColumnMappingMode::BY_FIELD_ID &&
+            !table_child.has_identifier_field_id()) {
+            // Synthetic children are derived from the table DataType when nested ColumnDefinition
+            // metadata has been pruned away. They do not carry Iceberg field ids, so try a name
+            // match before falling back to ordinal order. Example:
+            //   table value type: Struct(age, full_name, gender)
+            //   old file value:   Struct(name, age)
+            // Name matching keeps `age -> age`; the later unused-child fallback can then map the
+            // renamed `full_name -> name` instead of consuming `age` twice.
+            if (const auto* file_child = NameMatcher().find(table_child, file_parent.children);
+                file_child != nullptr) {
+                return file_child;
+            }
+        }
+        // Some callers only carry the full complex DataType for a projected table column, without
+        // expanded nested ColumnDefinitions. In that case we can still preserve full materialization
+        // by walking table/file struct fields by ordinal. This is a fallback only: explicit
+        // ColumnDefinition children keep using the requested table-format matching rule, which is
+        // required for precise schema evolution.
+        if (allow_ordinal_fallback && table_child_idx < file_parent.children.size()) {
+            return &file_parent.children[table_child_idx];
+        }
+        return nullptr;
+    }
+}
+
+static ColumnDefinition synthetic_child_definition(const std::string& name, DataTypePtr type,
+                                                   int32_t local_id) {
+    ColumnDefinition child;
+    child.identifier = Field::create_field<TYPE_STRING>(name);
+    child.local_id = local_id;
+    child.name = name;
+    child.type = std::move(type);
+    return child;
+}
+
+static std::vector<ColumnDefinition> synthesize_complex_children_from_type(
+        const DataTypePtr& type) {
+    std::vector<ColumnDefinition> children;
+    if (type == nullptr) {
+        return children;
+    }
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        children.push_back(synthetic_child_definition("element", array_type->get_nested_type(), 0));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        children.push_back(synthetic_child_definition("key", map_type->get_key_type(), 0));
+        children.push_back(synthetic_child_definition("value", map_type->get_value_type(), 1));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        children.reserve(struct_type->get_elements().size());
+        for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) {
+            children.push_back(synthetic_child_definition(struct_type->get_element_name(idx),
+                                                          struct_type->get_element(idx),
+                                                          cast_set<int32_t>(idx)));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return children;
+}
+
+static bool has_table_child_named(const std::vector<ColumnDefinition>& children,
+                                  std::string_view name) {
+    return std::ranges::any_of(children, [&](const ColumnDefinition& child) {
+        return std::string_view(child.name) == name;
+    });
+}
+
+static void complete_required_complex_children_from_type(const DataTypePtr& type,
+                                                         std::vector<ColumnDefinition>* children) {
+    DORIS_CHECK(children != nullptr);
+    if (type == nullptr) {
+        return;
+    }
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        // MAP key/value are structural children, not independently materializable table fields.
+        // A key-only projection can still be attached to a whole-map output root, for example:
+        //   SELECT * FROM t WHERE ARRAY_CONTAINS(MAP_KEYS(new_map_column), 'person5')
+        //
+        // In that shape the scanner keeps the value stream readable, but the table projection can
+        // carry only the key child. Add the missing value child so recursive mapping can evolve the
+        // value type instead of letting TableReader cast old/new value structs directly.
+        if (has_table_child_named(*children, "key") && !has_table_child_named(*children, "value")) {
+            children->push_back(synthetic_child_definition("value", map_type->get_value_type(), 1));
+        }
+        break;
+    }
+    case TYPE_ARRAY:
+        // ARRAY has only one required structural child (`element`), so a non-empty projection is
+        // already rooted at the element path.
+        break;
+    case TYPE_STRUCT:
+        // STRUCT children are real fields and must remain prunable. Completing missing struct
+        // fields here would turn `SELECT s.a` into a full-struct read and undo nested projection.
+        break;
+    default:
+        break;
+    }
+}
+
+static Status validate_file_schema_children(const ColumnDefinition& file_field) {
+    if (file_field.type == nullptr) {
+        return Status::InternalError("File column '{}' has null type", file_field.name);
+    }
+    const auto nested_type = remove_nullable(file_field.type);
+    size_t expected_children = 0;
+    bool complex_with_fixed_children = true;
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY:
+        expected_children = 1;
+        break;
+    case TYPE_MAP:
+        expected_children = 2;
+        break;
+    case TYPE_STRUCT:
+        expected_children =
+                assert_cast<const DataTypeStruct*>(nested_type.get())->get_elements().size();
+        break;
+    default:
+        complex_with_fixed_children = false;
+        break;
+    }
+    if (!complex_with_fixed_children || file_field.children.size() == expected_children) {
+        return Status::OK();
+    }
+    return Status::InternalError(
+            "Malformed complex file schema for column '{}': type={}, expected_children={}, "
+            "actual_children={}",
+            file_field.name, file_field.type->get_name(), expected_children,
+            file_field.children.size());
+}
+
+static bool has_projected_file_children(const ColumnMapping& mapping) {
+    if (mapping.original_file_children.empty() || mapping.projected_file_children.empty()) {
+        return false;
+    }
+    if (mapping.original_file_children.size() != mapping.projected_file_children.size()) {
+        return true;
+    }
+    for (size_t idx = 0; idx < mapping.original_file_children.size(); ++idx) {
+        if (mapping.original_file_children[idx].file_local_id() !=
+            mapping.projected_file_children[idx].file_local_id()) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool needs_nested_file_projection(const ColumnMapping& mapping) {
+    if (has_projected_file_children(mapping)) {
+        // Return True if the projected child column is missing / re-ordered
+        return true;
+    }
+    return std::ranges::any_of(mapping.child_mappings, [](const ColumnMapping& child_mapping) {
+        return needs_nested_file_projection(child_mapping);
+    });
+}
+
+static Status build_complex_projection(const ColumnMapping& mapping, LocalColumnIndex* projection);
+
+// Build the projected file children/type according to the pruned complex projection. For example,
+// if we have a struct column `s` with children `id` and `name`, and the projection only keeps
+// `s.name`, then the file reader should expose `STRUCT<name ...>`.
+static Status rebuild_projected_file_children_and_type(
+        const DataTypePtr& file_type, const std::vector<ColumnDefinition>& original_file_children,
+        const std::vector<ColumnMapping>& child_mappings,
+        std::vector<ColumnDefinition>* projected_file_children, DataTypePtr* projected_type) {
+    DORIS_CHECK(file_type != nullptr);
+    DORIS_CHECK(projected_file_children != nullptr);
+    DORIS_CHECK(projected_type != nullptr);
+    ColumnDefinition field;
+    field.type = file_type;
+    field.children = original_file_children;
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(-1);
+    projection.children.reserve(child_mappings.size());
+    for (const auto* child_mapping : present_child_mappings_in_file_order(child_mappings)) {
+        DORIS_CHECK(child_mapping->file_local_id.has_value());
+        LocalColumnIndex child_projection;
+        RETURN_IF_ERROR(build_complex_projection(*child_mapping, &child_projection));
+        projection.children.push_back(std::move(child_projection));
+    }
+
+    ColumnDefinition projected_field;
+    RETURN_IF_ERROR(project_column_definition(field, projection, &projected_field));
+    *projected_file_children = std::move(projected_field.children);
+    *projected_type = std::move(projected_field.type);
+    return Status::OK();
+}
+
+// Build the complex column projection according to the ColumnMapping which is re-ordered by the
+// file-schema's order.
+//
+// For MAP, a partial projection represents value-subtree pruning only. The key child is not a
+// projected output shape; file readers still read full keys to construct ColumnMap offsets and keep
+// key semantics unchanged. If a caller tries to project only/prune the key child, the common schema
+// projection helper rejects it.
+static Status build_complex_projection(const ColumnMapping& mapping, LocalColumnIndex* projection) {
+    if (projection == nullptr) {
+        return Status::InvalidArgument("projection is null");
+    }
+    DORIS_CHECK(mapping.file_local_id.has_value());
+    *projection = LocalColumnIndex::local(*mapping.file_local_id);
+    projection->project_all_children = mapping.child_mappings.empty();
+    projection->children.clear();
+    const auto present_children = present_child_mappings_in_file_order(mapping.child_mappings);
+    if (!projection->project_all_children && present_children.empty()) {
+        // All requested table children under this complex node are missing/default-only. The file
+        // reader cannot expose an empty complex projection, but TableReader can still rematerialize
+        // the table shape from a full file subtree and fill the missing children with defaults.
+        projection->project_all_children = true;
+        return Status::OK();
+    }
+    for (const auto* child_mapping : present_children) {
+        LocalColumnIndex child_projection;
+        RETURN_IF_ERROR(build_complex_projection(*child_mapping, &child_projection));
+        projection->children.push_back(std::move(child_projection));
+    }
+    if (!projection->project_all_children && projection->children.empty()) {
+        return Status::NotSupported("Projection for complex column {} contains no file children",
+                                    mapping.file_column_name);
+    }
+    return Status::OK();
+}
+
+using FilterProjectionMap = std::map<LocalColumnId, LocalColumnIndex>;
+
+// Update the mapping's file type according to the projection, and determine whether the projection
+// is trivial (i.e. the projected file type is the same as the table type, so no need to
+// rematerialize the complex value back to table layout after reading from file).
+static Status apply_projection_to_mapping_file_type(const LocalColumnIndex& projection,
+                                                    ColumnMapping* mapping) {
+    DORIS_CHECK(mapping != nullptr);
+    if (mapping->original_file_type == nullptr) {
+        mapping->original_file_type = mapping->file_type;
+    }
+    if (mapping->original_file_type == nullptr ||
+        !is_complex_type(remove_nullable(mapping->original_file_type)->get_primitive_type())) {
+        return Status::OK();
+    }
+    ColumnDefinition field;
+    field.type = mapping->original_file_type;
+    field.children = mapping->original_file_children;
+    ColumnDefinition projected_field;
+    RETURN_IF_ERROR(project_column_definition(field, projection, &projected_field));
+    mapping->file_type = std::move(projected_field.type);
+    mapping->projected_file_children = std::move(projected_field.children);
+    mapping->is_trivial = mapping_can_use_file_column_directly(*mapping);
+    return Status::OK();
+}
+
+static Status merge_filter_projection(const FilterProjectionMap* filter_projections,
+                                      LocalColumnIndex* projection) {
+    DORIS_CHECK(projection != nullptr);
+    if (filter_projections == nullptr) {
+        return Status::OK();
+    }
+    const auto filter_projection_it = filter_projections->find(projection->column_id());
+    if (filter_projection_it == filter_projections->end()) {
+        return Status::OK();
+    }
+    // Merge predicate-only nested paths into the root projection that is about to be scanned.
+    // Example: `SELECT s.a WHERE s.b > 1` first builds the output projection `s -> a` from
+    // ColumnMapping, while build_nested_struct_filter_projection_map() records `s -> b`. This merge
+    // produces one file scan projection `s -> a,b`.
+    RETURN_IF_ERROR(merge_local_column_index(projection, filter_projection_it->second));
+    return Status::OK();
+}
+
+static bool table_root_is_map(const ColumnMapping& mapping) {
+    if (mapping.table_type == nullptr) {
+        return false;
+    }
+    return remove_nullable(mapping.table_type)->get_primitive_type() == TYPE_MAP;
+}
+
+static Status add_scan_column(FileScanRequest* file_request, ColumnMapping* mapping,
+                              bool is_predicate_column, bool force_full_complex_scan_projection,
+                              const FilterProjectionMap* filter_projections = nullptr) {
+    const auto file_column_id = LocalColumnId(mapping->file_local_id.value());
+    LocalColumnIndex projection = LocalColumnIndex::top_level(file_column_id);
+    // Columnar readers can turn a complex mapping into a nested file projection, but
+    // row-oriented readers must scan the full top-level complex field because all children are
+    // encoded in the same text cell.
+    if (!force_full_complex_scan_projection && needs_nested_file_projection(*mapping)) {
+        RETURN_IF_ERROR(build_complex_projection(*mapping, &projection));
+    }
+    if (is_predicate_column && !force_full_complex_scan_projection) {
+        DCHECK(filter_projections != nullptr);
+        // If a projected complex root is also used by a predicate, rebuild the predicate scan
+        // projection from the output mapping before merging predicate-only children. For
+        // `SELECT s.a WHERE s.b > 1`, build_complex_projection() produces `s -> a` and
+        // merge_filter_projection() adds `s -> b`, so the predicate column reads both children.
+        RETURN_IF_ERROR(merge_filter_projection(filter_projections, &projection));
+    }
+    FileScanRequestBuilder builder(file_request);
+    if (is_predicate_column) {
+        return builder.add_predicate_column(std::move(projection));
+    }
+    return builder.add_non_predicate_column(std::move(projection));
+}
+
+static const LocalColumnIndex* find_scan_projection(
+        const std::vector<LocalColumnIndex>& scan_columns, LocalColumnId file_column_id) {
+    const auto projection_it =
+            std::ranges::find_if(scan_columns, [&](const LocalColumnIndex& projection) {
+                return projection.column_id() == file_column_id;
+            });
+    return projection_it == scan_columns.end() ? nullptr : &*projection_it;
+}
+
+// Apply the final scan projection of one root file column back to its ColumnMapping. This updates
+// mapping.file_type/projected_file_children from the original file schema to the exact shape that
+// FileReader will return.
+//
+// Example: for `SELECT s.a WHERE s.b > 1`, add_scan_column() keeps only one predicate scan
+// projection `s -> a,b`. Applying that projection changes the mapping's file type from the full
+// file struct `s<a,b,c>` to the projected file struct `s<a,b>`, so later filter rewrite and
+// TableReader final materialization use the same column shape as the file-local block.
+static Status apply_scan_projection_to_mapping_file_type(const FileScanRequest& file_request,
+                                                         ColumnMapping* mapping) {
+    DORIS_CHECK(mapping != nullptr);
+    DORIS_CHECK(mapping->file_local_id.has_value());
+    const auto file_column_id = LocalColumnId(*mapping->file_local_id);
+    // Predicate columns are the actual scan projection when a column is used by row-level filters:
+    // add_scan_column() removes the duplicate non-predicate projection in that case.
+    const auto* projection = find_scan_projection(file_request.predicate_columns, file_column_id);
+    if (projection == nullptr) {
+        projection = find_scan_projection(file_request.non_predicate_columns, file_column_id);
+    }
+    DORIS_CHECK(projection != nullptr);
+    return apply_projection_to_mapping_file_type(*projection, mapping);
+}
+
+// Build extra scan projections required only by row-level filters on nested struct children.
+//
+// Example: for `SELECT s.a FROM t WHERE s.b.c > 1`, the output projection may only contain `s.a`,
+// but the file reader must also read `s.b.c` to evaluate the predicate. This function collects the
+// table-side filter path, resolves it through ColumnMapping first, and records the corresponding
+// file-side projection in filter_projections. This keeps renamed fields consistent across the scan
+// projection, row-level conjunct rewrite, and nested predicate pruning. Example:
+//   table filter path: s -> renamed_b -> c
+//   old file path:     s -> b -> c
+//   recorded path:     s -> b -> c
+// When add_scan_column() adds the same root as a predicate column, it rebuilds that root from the
+// output mapping, merges this filter-only projection into it, and removes the duplicate
+// non-predicate root entry.
+static Status build_nested_struct_filter_projection_map(
+        const std::vector<TableFilter>& table_filters, const std::vector<ColumnMapping>& mappings,
+        FilterProjectionMap* filter_projections) {
+    DORIS_CHECK(filter_projections != nullptr);
+    filter_projections->clear();
+    for (const auto& table_filter : table_filters) {
+        if (table_filter.conjunct == nullptr) {
+            continue;
+        }
+        // Collect all nested struct paths in the table filter. For example, for
+        // `s.id > 5 AND element_at(s, 'renamed_name') = 'abc'`, collect the table paths
+        // `s -> id` and `s -> renamed_name`, then resolve each one to its file-side projection.
+        std::vector<NestedStructPath> paths;
+        collect_nested_struct_paths(table_filter.conjunct->root(), &paths);
+        for (const auto& path : paths) {
+            auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) {
+                return mapping.global_index == path.root_global_index;
+            });
+            if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value() ||
+                path.selectors.empty()) {
+                continue;
+            }
+
+            ResolvedNestedStructPath resolved;
+            LocalColumnIndex root_projection;
+            if (!resolve_nested_struct_path_for_file(path, mappings, &resolved)) {
+                if (!table_root_is_map(*mapping_it)) {
+                    continue;
+                }
+                // Direct map value filters such as `m.value.a > 1` need the value leaf for row
+                // evaluation even when the query only projects another value child. This is only a
+                // scan projection fallback; complex map/array expressions are still not rewritten
+                // into file-local conjuncts.
+                LocalColumnIndex child_projection;
+                RETURN_IF_ERROR(build_file_child_projection_from_schema(
+                        mapping_it->original_file_children, path.selectors, &child_projection));
+                if (child_projection.local_id() < 0) {
+                    continue;
+                }
+                root_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id);
+                root_projection.children.push_back(std::move(child_projection));
+            } else {
+                root_projection = std::move(resolved.file_projection);
+            }
+            auto filter_projection_it = filter_projections->find(root_projection.column_id());
+            if (filter_projection_it == filter_projections->end()) {
+                filter_projections->emplace(root_projection.column_id(),
+                                            std::move(root_projection));
+                continue;
+            }
+            RETURN_IF_ERROR(
+                    merge_local_column_index(&filter_projection_it->second, root_projection));
+        }
+    }
+    return Status::OK();
+}
+
+static void rebuild_projection(ColumnMapping* mapping, LocalIndex block_position) {
+    DORIS_CHECK(mapping->file_local_id.has_value());
+    if (mapping->is_trivial || needs_complex_rematerialize(*mapping)) {
+        mapping->projection = VExprContext::create_shared(VSlotRef::create_shared(
+                cast_set<int>(block_position.value()), cast_set<int>(block_position.value()), -1,
+                mapping->file_type, mapping->file_column_name));
+        return;
+    }
+
+    auto expr = Cast::create_shared(mapping->table_type);
+    expr->add_child(VSlotRef::create_shared(cast_set<int>(block_position.value()),
+                                            cast_set<int>(block_position.value()), -1,
+                                            mapping->file_type, mapping->file_column_name));
+    mapping->projection = VExprContext::create_shared(expr);
+}
+
+// Build file slot rewrite info from the localized filter targets. Only local targets can enter
+// file-reader expressions; constant and unset targets stay above the file reader.
+static std::map<GlobalIndex, FileSlotRewriteInfo> build_file_slot_rewrite_map(
+        const std::vector<ColumnMapping>& mappings,
+        const std::map<GlobalIndex, FilterEntry>& filter_entries) {
+    std::map<GlobalIndex, FileSlotRewriteInfo> global_to_file_slot;
+    for (const auto& mapping : mappings) {
+        const auto entry_it = filter_entries.find(mapping.global_index);
+        if (entry_it == filter_entries.end() || !entry_it->second.is_local()) {
+            continue;
+        }
+        DORIS_CHECK(mapping.file_local_id.has_value());
+        global_to_file_slot.emplace(
+                mapping.global_index,
+                FileSlotRewriteInfo {.block_position = entry_it->second.local_index().value(),
+                                     .file_type = mapping.file_type,
+                                     .table_type = mapping.table_type,
+                                     .file_column_name = mapping.file_column_name});
+    }
+    return global_to_file_slot;
+}
+
+Status TableColumnMapper::_create_by_index_mapping(const ColumnDefinition& table_column,
+                                                   const std::vector<ColumnDefinition>& file_schema,
+                                                   ColumnMapping* mapping) {
+    DORIS_CHECK(mapping != nullptr);
+    DORIS_CHECK(!table_column.is_partition_key);
+
+    // Key contract: in BY_INDEX mode, `ColumnDefinition::identifier` TYPE_INT is interpreted as the
+    // 0-based position of this column inside `file_schema`. FE writes the physical file position
+    // of each non-partition projected column into that identifier. This interpretation allows:
+    //   - sparse projection: read only a subset of file columns (for example only `_col2`
+    //     and `_col4`);
+    //   - column reordering: table column order differs from file column order;
+    //   - no many-to-one mapping: FE must guarantee that each file position is referenced by at
+    //     most one table column.
+    const auto file_index = table_column.get_identifier_position();
+
+    // Case A: file_index is in range, so build a direct positional mapping.
+    // The file column name (for example `_col0`) is intentionally ignored here.
+    if (file_index >= 0 && static_cast<size_t>(file_index) < file_schema.size()) {
+        return _create_direct_mapping(table_column, file_schema[static_cast<size_t>(file_index)],
+                                      mapping);
+    }
+
+    // Case B: file_index is out of range, which means the file does not contain this column.
+    // Route it through the missing-column path used by schema evolution.
+    if (table_column.default_expr != nullptr) {
+        _set_constant_mapping(mapping, table_column.default_expr);
+        return Status::OK();
+    }
+    // Keep the mapping empty (`file_local_id` remains `nullopt`) and let the upper finalize
+    // stage fill NULL/default values.
+    return Status::OK();
+}
+
+void TableColumnMapper::_set_constant_mapping(ColumnMapping* mapping, VExprContextSPtr expr) {
+    DORIS_CHECK(mapping != nullptr);
+    DORIS_CHECK(expr != nullptr);
+    mapping->default_expr = std::move(expr);
+    mapping->constant_index = _constant_map.add(ConstantEntry {
+            .global_index = mapping->global_index,
+            .expr = mapping->default_expr,
+            .type = mapping->table_type,
+    });
+    mapping->filter_conversion = FilterConversionType::CONSTANT;
+}
+
+Status TableColumnMapper::_create_mapping_for_column(const ColumnDefinition& table_column,
+                                                     GlobalIndex global_index,
+                                                     ColumnMapping* mapping) {
+    DORIS_CHECK(mapping != nullptr);
+    *mapping = ColumnMapping {};
+    mapping->global_index = global_index;
+    mapping->table_column_name = table_column.name;
+    mapping->table_type = table_column.type;
+    const auto row_lineage_type = row_lineage_virtual_column_type(table_column, _options.mode);
+    if (const auto* partition_value = find_partition_value(table_column, _partition_values);
+        table_column.is_partition_key && partition_value != nullptr) {
+        // Partition values are split constants and must take precedence over defaults.
+        _set_constant_mapping(mapping, VExprContext::create_shared(VLiteral::create_shared(
+                                               mapping->table_type, *partition_value)));
+    } else if (_options.mode == TableColumnMappingMode::BY_INDEX &&
+               !table_column.is_partition_key && table_column.has_identifier_field_id()) {
+        // BY_INDEX interprets ColumnDefinition::identifier as physical file position.
+        RETURN_IF_ERROR(_create_by_index_mapping(table_column, _file_schema, mapping));
+    } else if (const auto* file_field = _find_file_field(table_column, _file_schema)) {
+        // Normal physical file column mapping.
+        RETURN_IF_ERROR(_create_direct_mapping(table_column, *file_field, mapping));
+        if (row_lineage_type != TableVirtualColumnType::INVALID) {
+            // Iceberg v3 rewritten files may physically contain row lineage metadata fields.
+            // File non-null values must be preserved, while file NULLs still inherit from data file
+            // metadata in IcebergTableReader. Therefore the mapping has a real file source plus a
+            // virtual post-materialization step, and filters must wait for finalize output.
+            mapping->virtual_column_type = row_lineage_type;
+            mapping->filter_conversion = FilterConversionType::FINALIZE_ONLY;
+        }
+    } else if (row_lineage_type != TableVirtualColumnType::INVALID) {
+        // Iceberg row lineage metadata fields are optional in data files. Missing fields are exposed
+        // as all-NULL table columns first; IcebergTableReader fills inherited values only when the
+        // split carries first_row_id / last_updated_sequence_number metadata.
+        // FE may attach a default_expr to these hidden metadata columns, but the Iceberg v3
+        // inheritance rule must take precedence over the generic missing-column default path.
+        mapping->virtual_column_type = row_lineage_type;
+    } else if (table_column.name == BeConsts::ICEBERG_ROWID_COL) {
+        // Doris internal Iceberg row locator is never a physical Iceberg data column. It is built
+        // from file path, row position and partition metadata for delete/update/merge.
+        mapping->virtual_column_type = TableVirtualColumnType::ICEBERG_ROWID;
+    } else if (table_column.default_expr != nullptr) {
+        // Missing schema-evolution column with an explicit default expression.
+        _set_constant_mapping(mapping, table_column.default_expr);
+    } else {
+        if (table_column.is_partition_key) {
+            return Status::InvalidArgument(
+                    "Table column '{}' (global_index={}) does not have a matching partition value",
+                    table_column.name, mapping->global_index.value());
+        }
+    }
+    return Status::OK();
+}
+
+Status TableColumnMapper::_create_hidden_filter_mapping(const ColumnDefinition& table_column,
+                                                        GlobalIndex global_index,
+                                                        ColumnMapping* mapping) {
+    auto status = _create_mapping_for_column(table_column, global_index, mapping);
+    if (mapping->file_local_id.has_value() || mapping->constant_index.has_value() ||
+        mapping->virtual_column_type != TableVirtualColumnType::INVALID) {
+        return Status::OK();
+    }
+    if (_options.mode == TableColumnMappingMode::BY_NAME) {
+        return status;
+    }
+
+    // Predicate-only slot refs carry the table name/type but do not carry the table-format field
+    // id used by BY_FIELD_ID or the file position used by BY_INDEX. Use a name fallback only for
+    // hidden filter localization; projected columns still obey the requested mapping mode.
+    const auto* file_field =
+            matcher_for_mode(TableColumnMappingMode::BY_NAME).find(table_column, _file_schema);
+    if (file_field == nullptr) {
+        return status;
+    }
+    ColumnMapping fallback_mapping;
+    fallback_mapping.global_index = global_index;
+    fallback_mapping.table_column_name = table_column.name;
+    fallback_mapping.table_type = table_column.type;
+    RETURN_IF_ERROR(_create_direct_mapping(table_column, *file_field, &fallback_mapping));
+    *mapping = std::move(fallback_mapping);
+    return Status::OK();
+}
+
+Status TableColumnMapper::_build_hidden_filter_mappings(
+        const std::vector<TableFilter>& table_filters) {
+    _hidden_mappings.clear();
+
+    std::map<GlobalIndex, ColumnDefinition> filter_columns;
+    for (const auto& table_filter : table_filters) {
+        if (table_filter.conjunct != nullptr) {
+            collect_top_level_slot_columns(table_filter.conjunct->root(), &filter_columns);
+        }
+    }
+
+    // TableColumnPredicates only carry GlobalIndex and predicate objects. They do not provide the
+    // top-level column name/type needed to build a hidden mapping, so a predicate-only column can
+    // be hidden-mapped only when the same root slot also appears in a conjunct.
+    for (const auto& [global_index, table_column] : filter_columns) {
+        if (_find_mapping(global_index) != nullptr) {
+            // Ignore columns that are already mapped by the projected columns
+            continue;
+        }
+        ColumnMapping mapping;
+        RETURN_IF_ERROR(_create_hidden_filter_mapping(table_column, global_index, &mapping));
+        if (mapping.file_local_id.has_value() || mapping.constant_index.has_value() ||
+            mapping.virtual_column_type != TableVirtualColumnType::INVALID) {
+            _hidden_mappings.push_back(std::move(mapping));
+        }
+    }
+    return Status::OK();
+}
+
+Status TableColumnMapper::create_mapping(const std::vector<ColumnDefinition>& projected_columns,
+                                         const std::map<std::string, Field>& partition_values,
+                                         const std::vector<ColumnDefinition>& file_schema) {
+    clear();
+    _partition_values = partition_values;
+    _file_schema = file_schema;
+    for (size_t column_idx = 0; column_idx < projected_columns.size(); ++column_idx) {
+        ColumnMapping mapping;
+        RETURN_IF_ERROR(_create_mapping_for_column(projected_columns[column_idx],
+                                                   GlobalIndex(column_idx), &mapping));
+        _mappings.push_back(std::move(mapping));
+    }
+    return Status::OK();
+}
+
+std::vector<ColumnMapping> TableColumnMapper::_filter_visible_mappings() const {
+    std::vector<ColumnMapping> mappings;
+    mappings.reserve(_mappings.size() + _hidden_mappings.size());
+    mappings.insert(mappings.end(), _mappings.begin(), _mappings.end());
+    mappings.insert(mappings.end(), _hidden_mappings.begin(), _hidden_mappings.end());
+    return mappings;
+}
+
+Status TableColumnMapper::_build_filter_entries(const FileScanRequest& file_request) {
+    _filter_entries.clear();
+    const auto mappings = _filter_visible_mappings();
+    for (const auto& mapping : mappings) {
+        FilterEntry entry;
+        if (mapping.constant_index.has_value()) {
+            entry = FilterEntry::constant(*mapping.constant_index);
+        } else if (mapping.file_local_id.has_value() &&
+                   filter_conversion_has_local_source(mapping.filter_conversion)) {
+            const auto local_position_it =
+                    file_request.local_positions.find(LocalColumnId(*mapping.file_local_id));
+            if (local_position_it != file_request.local_positions.end()) {
+                entry = FilterEntry::local(local_position_it->second);
+            }
+        }
+        _filter_entries.emplace(mapping.global_index, entry);
+    }
+    return Status::OK();
+}
+
+Status TableColumnMapper::create_scan_request(
+        const std::vector<TableFilter>& table_filters,
+        const TableColumnPredicates& table_column_predicates,
+        const std::vector<ColumnDefinition>& projected_columns, FileScanRequest* file_request,
+        RuntimeState* runtime_state) {
+    // FileReader evaluates expressions against a file-local block. This mapper owns the
+    // table-column to file-column conversion, so it also owns the file-local block positions.
+    file_request->predicate_columns.clear();
+    file_request->non_predicate_columns.clear();
+    file_request->local_positions.clear();
+    file_request->conjuncts.clear();
+    file_request->delete_conjuncts.clear();
+    file_request->column_predicate_filters.clear();
+    _filter_entries.clear();
+    // 1. Build referenced non-predicate columns
+    for (size_t column_idx = 0; column_idx < projected_columns.size(); ++column_idx) {
+        const auto global_index = GlobalIndex(column_idx);
+        auto* mapping = _find_mapping(global_index);
+        if (mapping != nullptr && mapping->file_local_id.has_value()) {
+            // A file column can be read lazily as a non-predicate column only when it is not used
+            // by row-level expression filters. Single-column ColumnPredicate filters are pruning
+            // hints only and must not force row-level predicate materialization.
+            bool used_by_filter = false;
+            for (const auto& table_filter : table_filters) {
+                const auto& global_indices = table_filter.global_indices;
+                if (std::find(global_indices.begin(), global_indices.end(), global_index) !=
+                            global_indices.end() &&
+                    filter_conversion_has_local_source(mapping->filter_conversion)) {
+                    used_by_filter = true;
+                    break;
+                }
+            }
+            if (!used_by_filter || !enable_lazy_materialization()) {
+                RETURN_IF_ERROR(add_scan_column(file_request, mapping, false,
+                                                force_full_complex_scan_projection()));
+            }
+        }
+    }
+    // 2. Build referenced predicate columns
+    // Hidden filter mappings must be built before localizing filters, so that they can be localized together with visible mappings and referenced by localized filter expressions.
+    RETURN_IF_ERROR(_build_hidden_filter_mappings(table_filters));
+    RETURN_IF_ERROR(
+            localize_filters(table_filters, table_column_predicates, file_request, runtime_state));
+    // 3. Rebuild output projection expressions for projected columns. localize_filters() has
+    // already applied the final scan projection to mapping.file_type/projected_file_children before
+    // rewriting filter expressions.
+    for (auto& mapping : _mappings) {
+        if (!mapping.file_local_id.has_value()) {
+            continue;
+        }
+        auto position_it =
+                file_request->local_positions.find(LocalColumnId(*mapping.file_local_id));
+        DORIS_CHECK(position_it != file_request->local_positions.end())
+                << file_request->local_positions.size() << " " << *mapping.file_local_id << " "
+                << mapping.file_column_name;
+        rebuild_projection(&mapping, position_it->second);
+    }
+    return Status::OK();
+}
+
+ColumnMapping* TableColumnMapper::_find_mapping(GlobalIndex global_index) {
+    for (auto& mapping : _mappings) {
+        if (mapping.global_index == global_index) {
+            return &mapping;
+        }
+    }
+    return nullptr;
+}
+
+ColumnMapping* TableColumnMapper::_find_filter_mapping(GlobalIndex global_index) {
+    if (auto* mapping = _find_mapping(global_index); mapping != nullptr) {
+        return mapping;
+    }
+    for (auto& mapping : _hidden_mappings) {
+        if (mapping.global_index == global_index) {
+            return &mapping;
+        }
+    }
+    return nullptr;
+}
+
+Status TableColumnMapper::localize_filters(const std::vector<TableFilter>& table_filters,
+                                           const TableColumnPredicates& table_column_predicates,
+                                           FileScanRequest* file_request,
+                                           RuntimeState* runtime_state) {
+    FilterProjectionMap filter_projections;
+    auto filter_mappings = _filter_visible_mappings();
+    RETURN_IF_ERROR(build_nested_struct_filter_projection_map(table_filters, filter_mappings,
+                                                              &filter_projections));
+    for (const auto& table_filter : table_filters) {
+        for (const auto& global_index : table_filter.global_indices) {
+            auto* mapping = _find_filter_mapping(global_index);
+            if (mapping == nullptr || !mapping->file_local_id.has_value() ||
+                !filter_conversion_has_local_source(mapping->filter_conversion)) {
+                continue;
+            }
+            RETURN_IF_ERROR(add_scan_column(file_request, mapping, enable_lazy_materialization(),
+                                            force_full_complex_scan_projection(),
+                                            &filter_projections));
+        }
+    }
+    // Rebuild the file type for every scan-local mapping before expression rewrite. Predicate-only
+    // hidden mappings must see the same projected file type as the file reader will produce.
+    for (auto& mapping : _mappings) {
+        if (mapping.file_local_id.has_value() &&
+            file_request->local_positions.contains(LocalColumnId(*mapping.file_local_id))) {
+            RETURN_IF_ERROR(apply_scan_projection_to_mapping_file_type(*file_request, &mapping));
+        }
+    }
+    for (auto& mapping : _hidden_mappings) {
+        if (mapping.file_local_id.has_value() &&
+            file_request->local_positions.contains(LocalColumnId(*mapping.file_local_id))) {
+            RETURN_IF_ERROR(apply_scan_projection_to_mapping_file_type(*file_request, &mapping));
+        }
+    }
+    RETURN_IF_ERROR(_build_filter_entries(*file_request));
+
+    // Build the complete table-slot rewrite map after all predicate columns have been assigned.
+    // This keeps expression localization independent from filter iteration order.
+    filter_mappings = _filter_visible_mappings();
+    const auto global_to_file_slot = build_file_slot_rewrite_map(filter_mappings, _filter_entries);
+    for (const auto& table_filter : table_filters) {
+        if (table_filter.conjunct != nullptr &&
+            table_filter_has_only_local_entries(table_filter, _filter_entries)) {
+            RewriteContext rewrite_context {.runtime_state = runtime_state};
+            VExprSPtr rewrite_root;
+            Status clone_status;
+            try {
+                clone_status = clone_table_expr_tree(table_filter.conjunct->root(), &rewrite_root);
+            } catch (const Exception& e) {
+                // Some table filters contain complex intermediate values, for example
+                // `element_at(MAP_VALUES(m)[1], 'age') > 30`. The current file-local rewrite only
+                // understands top-level slots and struct-element paths rooted at top-level slots;
+                // cloning such expressions can hit the generic TExpr complex-type limitation.
+                // Leave them above TableReader, where Scanner evaluates the original table-level
+                // conjunct after final materialization.
+#ifndef NDEBUG
+                return Status::InternalError(
+                        "Failed to clone table filter for file-local rewrite: {}, expr={}",
+                        e.to_string(), table_filter.conjunct->root()->debug_string());
+#else
+                continue;
+#endif
+            } catch (const std::exception& e) {
+#ifndef NDEBUG
+                return Status::InternalError(
+                        "Failed to clone table filter for file-local rewrite: {}, expr={}",
+                        e.what(), table_filter.conjunct->root()->debug_string());
+#else
+                continue;
+#endif
+            }
+            if (!clone_status.ok()) {
+#ifndef NDEBUG
+                return Status::InternalError(
+                        "Failed to clone table filter for file-local rewrite: {}, expr={}",
+                        clone_status.to_string(), table_filter.conjunct->root()->debug_string());
+#else
+                continue;
+#endif
+            }
+            bool can_localize = true;
+            auto localized_root = rewrite_table_expr_to_file_expr(rewrite_root, global_to_file_slot,
+                                                                  filter_mappings, &rewrite_context,
+                                                                  &can_localize);
+            if (!can_localize) {
+                continue;
+            }
+            auto localized_conjunct = VExprContext::create_shared(std::move(localized_root));
+            RETURN_IF_ERROR(rewrite_context.prepare_created_exprs(localized_conjunct.get()));
+            file_request->conjuncts.push_back(std::move(localized_conjunct));
+        }
+    }
+    if (enable_column_predicate_filters()) {
+        for (const auto& [global_index, predicates] : table_column_predicates) {
+            const auto* mapping = _find_filter_mapping(global_index);
+            const auto entry_it = _filter_entries.find(global_index);
+            if (mapping == nullptr || !mapping->file_local_id.has_value() || predicates.empty() ||
+                entry_it == _filter_entries.end() || !entry_it->second.is_local() ||
+                !column_predicate_can_use_local_source(mapping->filter_conversion) ||
+                mapping->file_type == nullptr) {
+                continue;
+            }
+            FileColumnPredicateFilter column_predicate_filter;
+            column_predicate_filter.file_column_id = LocalColumnId(*mapping->file_local_id);
+            column_predicate_filter.target =
+                    FileNestedPredicateTarget(column_predicate_filter.file_column_id);
+            const auto file_primitive_type =
+                    remove_nullable(mapping->file_type)->get_primitive_type();
+            for (const auto& predicate : predicates) {
+                DORIS_CHECK(predicate != nullptr);
+                if (predicate->primitive_type() == file_primitive_type) {
+                    column_predicate_filter.predicates.push_back(predicate);
+                }
+            }
+            if (column_predicate_filter.predicates.empty()) {
+                continue;
+            }
+            file_request->column_predicate_filters.push_back(std::move(column_predicate_filter));
+        }
+        for (const auto& table_filter : table_filters) {
+            if (table_filter.conjunct == nullptr ||
+                !table_filter_has_only_local_entries(table_filter, _filter_entries)) {
+                continue;
+            }
+            std::vector<FileColumnPredicateFilter> nested_column_predicate_filters;
+            collect_nested_column_predicate_filters(table_filter.conjunct->root(), filter_mappings,
+                                                    &nested_column_predicate_filters);
+            for (auto& column_predicate_filter : nested_column_predicate_filters) {
+                merge_column_predicate_filter(std::move(column_predicate_filter),
+                                              &file_request->column_predicate_filters);
+            }
+        }
+    }
+    return Status::OK();
+}
+
+const ColumnDefinition* TableColumnMapper::_find_file_field(
+        const ColumnDefinition& table_column,
+        const std::vector<ColumnDefinition>& file_schema) const {
+    if (table_column.name.starts_with(BeConsts::GLOBAL_ROWID_COL)) {
+        const auto field_it = std::ranges::find_if(file_schema, [](const ColumnDefinition& field) {
+            return field.column_type == ColumnType::GLOBAL_ROWID;
+        });
+        return field_it == file_schema.end() ? nullptr : &*field_it;
+    }
+    return matcher_for_mode(_options.mode).find(table_column, file_schema);
+}
+
+Status TableColumnMapper::_create_direct_mapping(const ColumnDefinition& table_column,
+                                                 const ColumnDefinition& file_field,
+                                                 ColumnMapping* mapping) const {
+    DORIS_CHECK(mapping != nullptr);
+    DORIS_CHECK(file_field.local_id >= 0 || file_field.local_id == GLOBAL_ROWID_COLUMN_ID);
+    mapping->file_local_id = file_field.local_id;
+    mapping->table_column_name = table_column.name;
+    mapping->file_column_name = file_field.name;
+    mapping->original_file_type = file_field.type;
+    mapping->original_file_children = file_field.children;
+    mapping->projected_file_children = file_field.children;
+    mapping->file_type = file_field.type;
+    mapping->is_trivial = mapping_can_use_file_column_directly(*mapping);
+    mapping->filter_conversion = mapping->is_trivial ? FilterConversionType::COPY_DIRECTLY
+                                                     : FilterConversionType::CAST_FILTER;
+    mapping->child_mappings.clear();
+
+    auto table_children = table_column.children;
+    const auto nested_table_type = remove_nullable(mapping->table_type);
+    // Some scan paths, especially SELECT *, only carry the complete complex DataType for a table
+    // column and leave ColumnDefinition::children empty. If the file type is an older complex
+    // schema, treating this as a leaf mapping would make TableReader fall back to a plain CAST.
+    // That is invalid for evolved structs with different field counts.
+    //
+    // Example:
+    //   table column type: Map(String, Struct(age, full_name, gender))
+    //   old file type:    Map(String, Struct(age, name))
+    //   table children:   empty
+    //
+    // Synthesize key/value/struct-field children from the table type so the normal recursive
+    // mapping path can rematerialize `name -> full_name` and fill missing `gender` with defaults,
+    // instead of trying to CAST Struct(age, name) to Struct(age, full_name, gender).
+    const bool synthesized_table_children =
+            table_children.empty() && is_complex_type(nested_table_type->get_primitive_type()) &&
+            !mapping->table_type->equals(*mapping->file_type);
+    if (synthesized_table_children) {
+        table_children = synthesize_complex_children_from_type(mapping->table_type);
+    } else if (!table_children.empty() && !mapping->table_type->equals(*mapping->file_type)) {
+        complete_required_complex_children_from_type(mapping->table_type, &table_children);
+    }
+
+    if (!table_children.empty()) {
+        if (!is_complex_type(remove_nullable(mapping->file_type)->get_primitive_type())) {
+            return Status::NotSupported(
+                    "Cannot map complex table column '{}' to scalar parquet column '{}', table "
+                    "type={}, file type={}",
+                    table_column.name, file_field.name, mapping->table_type->get_name(),
+                    mapping->file_type->get_name());
+        }
+        RETURN_IF_ERROR(validate_file_schema_children(file_field));
+        std::vector<int32_t> synthesized_used_file_child_ids;
+        for (size_t table_child_idx = 0; table_child_idx < table_children.size();
+             ++table_child_idx) {
+            const auto& table_child = table_children[table_child_idx];
+            const auto* file_child =
+                    find_file_child_for_mapping(table_child, file_field, _options.mode,
+                                                table_child_idx, synthesized_table_children);
+            if (synthesized_table_children && file_child != nullptr) {
+                const auto file_child_id = file_child->file_local_id();
+                if (std::ranges::find(synthesized_used_file_child_ids, file_child_id) !=
+                    synthesized_used_file_child_ids.end()) {
+                    file_child = nullptr;
+                    for (const auto& candidate : file_field.children) {
+                        const auto candidate_id = candidate.file_local_id();
+                        if (std::ranges::find(synthesized_used_file_child_ids, candidate_id) ==
+                            synthesized_used_file_child_ids.end()) {
+                            file_child = &candidate;
+                            break;
+                        }
+                    }
+                }
+                if (file_child != nullptr) {
+                    synthesized_used_file_child_ids.push_back(file_child->file_local_id());
+                }
+            }
+            if (file_child == nullptr) {
+                ColumnMapping child_mapping;
+                child_mapping.table_column_name = table_child.name;
+                child_mapping.file_column_name = table_child.name;
+                child_mapping.table_type = table_child.type;
+                child_mapping.file_type = table_child.type;
+                child_mapping.filter_conversion = FilterConversionType::FINALIZE_ONLY;
+                mapping->child_mappings.push_back(std::move(child_mapping));
+                continue;
+            }
+            ColumnMapping child_mapping;
+            child_mapping.table_column_name = table_child.name;
+            child_mapping.table_type = table_child.type;
+            RETURN_IF_ERROR(_create_direct_mapping(table_child, *file_child, &child_mapping));
+            mapping->child_mappings.push_back(std::move(child_mapping));
+        }
+        if (needs_projected_file_type_rebuild(*mapping)) {
+            // If complex projection prunes some children, we have to rebuild the projected file type to make sure the reader expression can find the correct child types by name.
+            RETURN_IF_ERROR(rebuild_projected_file_children_and_type(
+                    mapping->file_type, mapping->original_file_children, mapping->child_mappings,
+                    &mapping->projected_file_children, &mapping->file_type));
+            DCHECK(mapping->table_type != nullptr);
+            mapping->is_trivial = mapping_can_use_file_column_directly(*mapping);
+            // TODO: ? READER_EXPRESSION
+            mapping->filter_conversion = mapping->is_trivial
+                                                 ? FilterConversionType::COPY_DIRECTLY
+                                                 : FilterConversionType::READER_EXPRESSION;
+        }
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/column_mapper.h b/be/src/format_v2/column_mapper.h
new file mode 100644
index 00000000000000..2ffbbbb9414d83
--- /dev/null
+++ b/be/src/format_v2/column_mapper.h
@@ -0,0 +1,294 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/field.h"
+#include "exprs/vexpr_fwd.h"
+#include "format_v2/file_reader.h"
+
+namespace doris {
+class ColumnPredicate;
+class RuntimeState;
+} // namespace doris
+
+namespace doris::format {
+
+struct ColumnDefinition;
+struct TableFilter;
+
+// Table-level simple predicates grouped by table/global output position. The key is not
+// LocalColumnId: TableColumnMapper resolves it through ColumnMapping before creating file pruning
+// hints.
+using TableColumnPredicates = std::map<GlobalIndex, std::vector<std::shared_ptr<ColumnPredicate>>>;
+
+enum class TableColumnMappingMode {
+    // Match by ColumnDefinition::identifier TYPE_INT as field id.
+    BY_FIELD_ID,
+    // Match by ColumnDefinition::identifier TYPE_STRING, or logical name when identifier is null.
+    BY_NAME,
+    // Match top-level columns by file position. This mainly serves Hive1 ORC style files whose
+    // column names are placeholder values such as `_col0` / `_col1`, where position is the only
+    // reliable way to select the correct column.
+    BY_INDEX,
+};
+
+enum TableVirtualColumnType {
+    INVALID = 0, // not a virtual column
+    // Iceberg v3 row lineage metadata column `_row_id`. Physical non-null values
+    // are preserved; NULL or missing values inherit first_row_id + row_position.
+    ROW_ID = 1,
+    // Iceberg v3 row lineage metadata column `_last_updated_sequence_number`.
+    // Physical non-null values are preserved; NULL or missing values inherit the
+    // data file's last_updated_sequence_number.
+    LAST_UPDATED_SEQUENCE_NUMBER = 2,
+    // Doris internal Iceberg row locator column `__DORIS_ICEBERG_ROWID_COL__`.
+    // It is a struct used by delete/update/merge, not the Iceberg `_row_id`.
+    ICEBERG_ROWID = 3,
+};
+
+enum class FilterConversionType {
+    COPY_DIRECTLY, // filter can be copied directly from file layer without any change, e.g. column type and table type are the same and no complex nested projection is involved.
+    CAST_FILTER, // filter can be converted from file layer by adding a cast, e.g. column type is nullable but table type is not, or file column has a trivial nested projection but table column has a complex nested projection.
+    READER_EXPRESSION,
+    FINALIZE_ONLY, // filter cannot be converted to file layer and should be evaluated at table reader finalize phase, e.g. predicates on ICEBERG_ROW_ID column which is generated by IcebergReader.
+    CONSTANT,
+};
+
+// Nested global-to-local child mapping. The root index points either to a request-local slot or to
+// a child id, depending on the owner. child_mapping keeps the recursive table-child to file-child
+// relationship explicit instead of encoding it in ColumnMapping flags.
+struct IndexMapping {
+    int32_t index = -1;
+    std::map<int32_t, std::shared_ptr<IndexMapping>> child_mapping;
+};
+
+// Recursive result produced after one table/global column is assigned to a file-local source.
+struct ColumnMapResult {
+    std::optional<LocalColumnId> local_column_id;
+    std::optional<LocalColumnIndex> column_index;
+    std::optional<IndexMapping> mapping;
+};
+
+// Final mapping entry from one global result column to one file-local source.
+struct ColumnMapEntry {
+    IndexMapping mapping;
+    DataTypePtr local_type;
+    DataTypePtr global_type;
+    FilterConversionType filter_conversion = FilterConversionType::FINALIZE_ONLY;
+};
+
+// Collection of final result-column mappings produced for one file/split.
+struct ResultColumnMapping {
+    std::map<GlobalIndex, ColumnMapEntry> global_to_local;
+};
+
+// Mapping result from one table column to one file column.
+// This is the main boundary object between table-level schema semantics and file-local schema
+// semantics.
+struct ColumnMapping {
+    // Position of the top-level projected column in the table/global output block. Table-level
+    // filters and column predicates refer to this index after FileScannerV2 translates FE ids at
+    // the scanner boundary.
+    GlobalIndex global_index;
+    std::string table_column_name;
+    // File-reader local id for the mapped node.
+    //
+    // For a root mapping it is convertible to LocalColumnId. For a nested mapping it is the
+    // LocalColumnIndex child id under the parent projection. This is deliberately separated from
+    // ColumnDefinition::identifier, which is the table-to-file matching key such as Parquet/Iceberg
+    // field_id or column name.
+    //
+    // Empty means the table column is constant, missing, partition-only, or virtual.
+    std::optional<int32_t> file_local_id;
+    std::string file_column_name;
+    // Full file type/children before nested projection pruning. Used to rebuild projected types
+    // and to localize nested filters that reference children not present in the output projection.
+    DataTypePtr original_file_type;
+    std::vector<ColumnDefinition> original_file_children;
+    // File children after applying the scan projection. The order follows the file-local semantic
+    // schema, not table child order. TableReader uses this to map table-output children back to the
+    // file-local block layout when projection, predicate-only children, and schema evolution mix.
+    std::vector<ColumnDefinition> projected_file_children;
+    // Split/file-local constant entry when this mapping is produced from partition/default/virtual
+    // expression instead of physical file data.
+    std::optional<ConstantIndex> constant_index;
+    // Effective file type after applying casts/remaps/nested projection pruning.
+    DataTypePtr file_type;
+    // Target table/global type after final materialization.
+    DataTypePtr table_type;
+
+    // Final projection expression used to convert file-local values into table/global values, such
+    // as casts, defaults, partition values, generated columns, or complex-column remaps.
+    VExprContextSPtr projection;
+
+    // Mapping tree for nested table children. The order follows table output children, while file
+    // children can be pruned/reordered through each child mapping's file-reader local id.
+    std::vector<ColumnMapping> child_mappings;
+    // True when file value can be used directly as table value without cast or child remap.
+    bool is_trivial = false;
+    // How filters referencing this table/global column can be converted below table-reader
+    // finalize. This is metadata for localize_filters() and future constant-filter evaluation.
+    FilterConversionType filter_conversion = FilterConversionType::FINALIZE_ONLY;
+    TableVirtualColumnType virtual_column_type = TableVirtualColumnType::INVALID;
+    VExprContextSPtr default_expr;
+
+    std::string debug_string() const;
+};
+
+struct TableColumnMapperOptions {
+    TableColumnMappingMode mode = TableColumnMappingMode::BY_FIELD_ID;
+
+    std::string debug_string() const;
+};
+
+Status clone_table_expr_tree(const VExprSPtr& expr, VExprSPtr* cloned_expr);
+const Field* find_partition_value(const ColumnDefinition& table_column,
+                                  const std::map<std::string, Field>& partition_values);
+
+// Generic mapping layer from table schema to file schema.
+// Iceberg uses BY_FIELD_ID. Plain by-name formats can reuse this component as well, so keep this
+// abstraction table-format neutral instead of making it Iceberg-only.
+class TableColumnMapper {
+public:
+    explicit TableColumnMapper(TableColumnMapperOptions options = {})
+            : _options(std::move(options)) {}
+    virtual ~TableColumnMapper() = default;
+
+    // Build column mappings from table schema to file schema.
+    // The resulting ColumnMapping describes how each table column is produced from a file column,
+    // a constant, or an expression. Later projection, filter localization, and table-block
+    // finalization should all reuse the same mapping.
+    virtual Status create_mapping(const std::vector<ColumnDefinition>& projected_columns,
+                                  const std::map<std::string, Field>& partition_values,
+                                  const std::vector<ColumnDefinition>& file_schema);
+
+    // Convert a table-level scan request into a file-local scan request. table_filters preserve
+    // row-level filtering semantics and are rewritten as file-local conjuncts. table_column_predicates
+    // are converted only into file-layer pruning hints and do not participate in batch row
+    // filtering.
+    virtual Status create_scan_request(const std::vector<TableFilter>& table_filters,
+                                       const TableColumnPredicates& table_column_predicates,
+                                       const std::vector<ColumnDefinition>& projected_columns,
+                                       FileScanRequest* file_request,
+                                       RuntimeState* runtime_state = nullptr);
+
+    // Localize table-level filters to the file schema.
+    // Trivial mappings can copy structured predicates directly. Type changes may be localized with
+    // a safe cast. Expressions that cannot be pushed down safely should be handled through
+    // reader_expression_map or table-level finalize/filter fallback.
+    virtual Status localize_filters(const std::vector<TableFilter>& table_filters,
+                                    const TableColumnPredicates& table_column_predicates,
+                                    FileScanRequest* file_request,
+                                    RuntimeState* runtime_state = nullptr);
+    void clear() {
+        _mappings.clear();
+        _hidden_mappings.clear();
+        _constant_map.clear();
+        _filter_entries.clear();
+        _file_schema.clear();
+        _partition_values.clear();
+    }
+    const std::vector<ColumnMapping>& mappings() const { return _mappings; }
+    const std::map<GlobalIndex, FilterEntry>& filter_entries() const { return _filter_entries; }
+    const ConstantMap& constant_map() const { return _constant_map; }
+    std::string debug_string() const;
+
+protected:
+    // Columnar readers such as Parquet can read predicate columns first, evaluate row filters, and
+    // lazily read the rest. Row-oriented readers such as CSV/Text materialize one row at a time and
+    // should keep all required columns in one scan list.
+    virtual bool enable_lazy_materialization() const { return true; }
+    // File-layer column predicate filters are reader-specific pruning hints. Parquet consumes them
+    // for row-group/page-index/statistics pruning; simple delimited readers do not.
+    virtual bool enable_column_predicate_filters() const { return true; }
+    // Row-oriented readers such as CSV/Text cannot physically read only a nested child from one
+    // delimited text field. They must scan the whole complex top-level field and let TableReader
+    // rematerialize the requested table child after row-level filters have run.
+    virtual bool force_full_complex_scan_projection() const { return false; }
+
+    const ColumnDefinition* _find_file_field(
+            const ColumnDefinition& table_column,
+            const std::vector<ColumnDefinition>& file_schema) const;
+    Status _create_direct_mapping(const ColumnDefinition& table_column,
+                                  const ColumnDefinition& file_field, ColumnMapping* mapping) const;
+
+    Status _create_by_index_mapping(const ColumnDefinition& table_column,
+                                    const std::vector<ColumnDefinition>& file_schema,
+                                    ColumnMapping* mapping);
+    Status _build_filter_entries(const FileScanRequest& file_request);
+    Status _build_result_column_mapping(const FileScanRequest& file_request);
+
+    void _set_constant_mapping(ColumnMapping* mapping, VExprContextSPtr expr);
+    Status _create_mapping_for_column(const ColumnDefinition& table_column,
+                                      GlobalIndex global_index, ColumnMapping* mapping);
+    Status _create_hidden_filter_mapping(const ColumnDefinition& table_column,
+                                         GlobalIndex global_index, ColumnMapping* mapping);
+    Status _build_hidden_filter_mappings(const std::vector<TableFilter>& table_filters);
+    std::vector<ColumnMapping> _filter_visible_mappings() const;
+
+    ColumnMapping* _find_mapping(GlobalIndex global_index);
+    ColumnMapping* _find_filter_mapping(GlobalIndex global_index);
+
+    TableColumnMapperOptions _options;
+    // Column mapping for each projected column, in the same order as projected_columns. Each entry
+    // describes how to get one table/global column from file-local sources, and carries metadata
+    // for filter localization and result finalize.
+    std::vector<ColumnMapping> _mappings;
+    // Predicate-only top-level columns are not output projection columns, so keep their mappings
+    // here. They are visible only to filter localization and file-reader predicate construction.
+    std::vector<ColumnMapping> _hidden_mappings;
+    std::map<GlobalIndex, FilterEntry> _filter_entries;
+    ConstantMap _constant_map;
+    // Split-local schema state retained from create_mapping() so create_scan_request() can build
+    // hidden mappings for top-level filter slots that are absent from projected_columns.
+    std::vector<ColumnDefinition> _file_schema;
+    std::map<std::string, Field> _partition_values;
+};
+
+// Parquet consumes the full FileScanRequest shape: predicate columns for lazy materialization and
+// column_predicate_filters for statistics/page-index pruning.
+class ParquetColumnMapper final : public TableColumnMapper {
+public:
+    using TableColumnMapper::TableColumnMapper;
+};
+
+// Mapper for readers that always materialize every required file column before filtering. The
+// table-to-file schema mapping is still generic, but the FileScanRequest layout is simpler:
+// predicate_columns and column_predicate_filters are not populated.
+class MaterializedColumnMapper final : public TableColumnMapper {
+public:
+    using TableColumnMapper::TableColumnMapper;
+
+protected:
+    bool enable_lazy_materialization() const override { return false; }
+    bool enable_column_predicate_filters() const override { return false; }
+    bool force_full_complex_scan_projection() const override { return true; }
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/column_mapper_nested.cpp b/be/src/format_v2/column_mapper_nested.cpp
new file mode 100644
index 00000000000000..0e3539242fff26
--- /dev/null
+++ b/be/src/format_v2/column_mapper_nested.cpp
@@ -0,0 +1,1050 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/column_mapper_nested.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/exception.h"
+#include "core/assert_cast.h"
+#include "core/data_type/convert_field_to_type.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/primitive_type.h"
+#include "exprs/create_predicate_function.h"
+#include "exprs/vexpr.h"
+#include "exprs/vin_predicate.h"
+#include "format_v2/expr/cast.h"
+#include "gen_cpp/Exprs_types.h"
+#include "storage/predicate/null_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris::format {
+
+namespace {
+
+static bool is_cast_expr(const VExprSPtr& expr) {
+    return dynamic_cast<const Cast*>(expr.get()) != nullptr;
+}
+
+static bool is_binary_comparison_predicate(const VExprSPtr& expr) {
+    if (expr == nullptr || expr->get_num_children() != 2 ||
+        (expr->node_type() != TExprNodeType::BINARY_PRED &&
+         expr->node_type() != TExprNodeType::NULL_AWARE_BINARY_PRED)) {
+        return false;
+    }
+    switch (expr->op()) {
+    case TExprOpcode::EQ:
+    case TExprOpcode::EQ_FOR_NULL:
+    case TExprOpcode::NE:
+    case TExprOpcode::GE:
+    case TExprOpcode::GT:
+    case TExprOpcode::LE:
+    case TExprOpcode::LT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static bool is_null_predicate_function(const VExprSPtr& expr, bool* is_null) {
+    DORIS_CHECK(is_null != nullptr);
+    if (expr == nullptr || expr->node_type() != TExprNodeType::FUNCTION_CALL ||
+        expr->get_num_children() != 1) {
+        return false;
+    }
+    if (expr->fn().name.function_name == "is_null_pred") {
+        *is_null = true;
+        return true;
+    }
+    if (expr->fn().name.function_name == "is_not_null_pred") {
+        *is_null = false;
+        return true;
+    }
+    return false;
+}
+
+static bool is_signed_integer_type(PrimitiveType type) {
+    switch (type) {
+    case TYPE_TINYINT:
+    case TYPE_SMALLINT:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_LARGEINT:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static int primitive_integer_width(PrimitiveType type) {
+    switch (type) {
+    case TYPE_TINYINT:
+        return 1;
+    case TYPE_SMALLINT:
+        return 2;
+    case TYPE_INT:
+        return 4;
+    case TYPE_BIGINT:
+        return 8;
+    case TYPE_LARGEINT:
+        return 16;
+    default:
+        return 0;
+    }
+}
+
+static bool is_decimal_type(PrimitiveType type) {
+    switch (type) {
+    case TYPE_DECIMAL32:
+    case TYPE_DECIMAL64:
+    case TYPE_DECIMALV2:
+    case TYPE_DECIMAL128I:
+    case TYPE_DECIMAL256:
+        return true;
+    default:
+        return false;
+    }
+}
+
+static bool is_order_preserving_safe_cast(const DataTypePtr& from_type,
+                                          const DataTypePtr& to_type) {
+    if (from_type == nullptr || to_type == nullptr) {
+        return false;
+    }
+    const auto from_nested_type = remove_nullable(from_type);
+    const auto to_nested_type = remove_nullable(to_type);
+    if (from_nested_type->equals(*to_nested_type)) {
+        return true;
+    }
+
+    const auto from_primitive_type = from_nested_type->get_primitive_type();
+    const auto to_primitive_type = to_nested_type->get_primitive_type();
+    if (is_signed_integer_type(from_primitive_type) && is_signed_integer_type(to_primitive_type)) {
+        return primitive_integer_width(to_primitive_type) >=
+               primitive_integer_width(from_primitive_type);
+    }
+    if (from_primitive_type == TYPE_FLOAT && to_primitive_type == TYPE_DOUBLE) {
+        return true;
+    }
+    if (is_decimal_type(from_primitive_type) && is_decimal_type(to_primitive_type)) {
+        return from_nested_type->get_scale() == to_nested_type->get_scale() &&
+               to_nested_type->get_precision() >= from_nested_type->get_precision();
+    }
+    return false;
+}
+
+static bool parse_struct_child_selector(const VExprSPtr& expr, StructChildSelector* selector) {
+    DORIS_CHECK(selector != nullptr);
+    if (expr == nullptr || !expr->is_literal()) {
+        return false;
+    }
+    const Field field = literal_field(expr);
+    switch (field.get_type()) {
+    case TYPE_STRING:
+    case TYPE_CHAR:
+    case TYPE_VARCHAR:
+        selector->by_name = true;
+        selector->name = std::string(field.as_string_view());
+        return true;
+    case TYPE_BOOLEAN:
+        selector->by_name = false;
+        selector->ordinal = field.get<TYPE_BOOLEAN>() ? 1 : 0;
+        return selector->ordinal > 0;
+    case TYPE_TINYINT:
+        selector->by_name = false;
+        if (field.get<TYPE_TINYINT>() <= 0) {
+            return false;
+        }
+        selector->ordinal = cast_set<size_t>(field.get<TYPE_TINYINT>());
+        return true;
+    case TYPE_SMALLINT:
+        selector->by_name = false;
+        if (field.get<TYPE_SMALLINT>() <= 0) {
+            return false;
+        }
+        selector->ordinal = cast_set<size_t>(field.get<TYPE_SMALLINT>());
+        return true;
+    case TYPE_INT:
+        selector->by_name = false;
+        if (field.get<TYPE_INT>() <= 0) {
+            return false;
+        }
+        selector->ordinal = cast_set<size_t>(field.get<TYPE_INT>());
+        return true;
+    case TYPE_BIGINT:
+        selector->by_name = false;
+        if (field.get<TYPE_BIGINT>() <= 0) {
+            return false;
+        }
+        selector->ordinal = cast_set<size_t>(field.get<TYPE_BIGINT>());
+        return true;
+    default:
+        return false;
+    }
+}
+
+static bool extract_nested_struct_path(const VExprSPtr& expr, NestedStructPath* path) {
+    DORIS_CHECK(path != nullptr);
+    if (!is_struct_element_expr(expr)) {
+        return false;
+    }
+
+    // Process for element_at(struct, 'field') or element_at(struct, 1) expression.
+    StructChildSelector selector;
+    if (!parse_struct_child_selector(expr->children()[1], &selector)) {
+        return false;
+    }
+
+    const auto& parent = expr->children()[0];
+    if (parent->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(parent.get());
+        path->root_global_index = slot_ref_global_index(*slot_ref);
+        path->selectors.clear();
+        path->selectors.push_back(std::move(selector));
+        return true;
+    }
+
+    // Process for element_at(element_at(struct<struct>, 'field'), 'field') or
+    // element_at(element_at(struct<struct>, 1), 1) expression.
+    if (!extract_nested_struct_path(parent, path)) {
+        return false;
+    }
+    path->selectors.push_back(std::move(selector));
+    return true;
+}
+
+static bool extract_nested_struct_path_for_pruning(const VExprSPtr& expr, NestedStructPath* path) {
+    DORIS_CHECK(path != nullptr);
+    // Simple `ELEMENT_AT`
+    if (extract_nested_struct_path(expr, path)) {
+        return true;
+    }
+
+    // `ELEMENT_AT` with `CAST`
+    if (!is_cast_expr(expr) || expr->get_num_children() != 1) {
+        return false;
+    }
+    const auto& child = expr->children()[0];
+    if (!is_order_preserving_safe_cast(child->data_type(), expr->data_type())) {
+        return false;
+    }
+    // A safe widening cast is null-preserving and keeps the comparison ordering of the nested
+    // primitive leaf, so file-layer pruning can target the original leaf statistics. The row-level
+    // filter still evaluates the original cast expression after read.
+    return extract_nested_struct_path_for_pruning(child, path);
+}
+
+static const ColumnDefinition* resolve_file_child(const std::vector<ColumnDefinition>& children,
+                                                  const StructChildSelector& selector) {
+    if (selector.by_name) {
+        const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) {
+            return child.name == selector.name;
+        });
+        return child_it == children.end() ? nullptr : &*child_it;
+    }
+    if (selector.ordinal == 0 || selector.ordinal > children.size()) {
+        return nullptr;
+    }
+    return &children[selector.ordinal - 1];
+}
+
+static const DataTypeStruct* struct_type_or_null(const DataTypePtr& type) {
+    if (type == nullptr) {
+        return nullptr;
+    }
+    const auto nested_type = remove_nullable(type);
+    if (nested_type->get_primitive_type() != TYPE_STRUCT) {
+        return nullptr;
+    }
+    return assert_cast<const DataTypeStruct*>(nested_type.get());
+}
+
+static std::optional<int32_t> struct_child_index(const ColumnMapping& mapping,
+                                                 const StructChildSelector& selector) {
+    const auto* struct_type = struct_type_or_null(mapping.table_type);
+    if (struct_type == nullptr) {
+        return std::nullopt;
+    }
+    if (selector.by_name) {
+        const auto position = struct_type->try_get_position_by_name(selector.name);
+        if (!position.has_value()) {
+            return std::nullopt;
+        }
+        return cast_set<int32_t>(*position);
+    }
+    if (selector.ordinal == 0 || selector.ordinal > struct_type->get_elements().size()) {
+        return std::nullopt;
+    }
+    return cast_set<int32_t>(selector.ordinal - 1);
+}
+
+// Get the global child index for a child mapping. If the mapping's table type is struct, resolve
+// the child index by the child mapping's table column name; otherwise, use the fallback child index.
+static int32_t child_mapping_global_index(const ColumnMapping& mapping,
+                                          const ColumnMapping& child_mapping,
+                                          size_t fallback_child_idx) {
+    const auto* struct_type = struct_type_or_null(mapping.table_type);
+    if (struct_type == nullptr) {
+        return cast_set<int32_t>(fallback_child_idx);
+    }
+    const auto position = struct_type->try_get_position_by_name(child_mapping.table_column_name);
+    DORIS_CHECK(position.has_value()) << "Cannot find child '" << child_mapping.table_column_name
+                                      << "' in table type " << mapping.table_type->get_name();
+    return cast_set<int32_t>(*position);
+}
+
+static const ColumnMapping* resolve_mapped_child(const ColumnMapping& mapping,
+                                                 int32_t global_child_index) {
+    for (size_t child_idx = 0; child_idx < mapping.child_mappings.size(); ++child_idx) {
+        const auto& child_mapping = mapping.child_mappings[child_idx];
+        if (child_mapping_global_index(mapping, child_mapping, child_idx) == global_child_index) {
+            return &child_mapping;
+        }
+    }
+    return nullptr;
+}
+
+enum class NestedProjectionResolveResult {
+    RESOLVED,
+    NOT_REPRESENTED,
+    MISSING_FILE_CHILD,
+};
+
+// Resolve a table-side nested struct path through the existing ColumnMapping tree and build the
+// corresponding file-local projection. For example, if table column `s` has children
+// `{a, renamed_b}` and file column `s` has children `{a, b}`, the filter path
+// `struct_element(s, 'renamed_b')` is resolved to the file projection `s -> b` by following the
+// child mapping instead of matching the table child name against the file schema. Return
+// MISSING_FILE_CHILD when ColumnMapping explicitly says a table child is absent from this file; in
+// that case callers must not fall back to schema-name lookup, because Iceberg can drop a field and
+// later add a different field with the same name.
+static NestedProjectionResolveResult resolve_nested_projection_with_mapping(
+        const NestedStructPath& path, const std::vector<ColumnMapping>& mappings,
+        LocalColumnIndex* root_projection) {
+    DORIS_CHECK(root_projection != nullptr);
+    *root_projection = {};
+    if (path.selectors.empty()) {
+        return NestedProjectionResolveResult::NOT_REPRESENTED;
+    }
+    const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) {
+        return mapping.global_index == path.root_global_index;
+    });
+    if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value()) {
+        return NestedProjectionResolveResult::NOT_REPRESENTED;
+    }
+
+    *root_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id);
+    auto* current_projection = root_projection;
+    const auto* current_mapping = &*mapping_it;
+
+    // Traverse the ColumnMapping tree according to the table-side struct selectors and emit the
+    // corresponding file-local child ids. A missing child mapping means this predicate-only path
+    // may need schema fallback; an existing child mapping without a file id means the table child
+    // is genuinely absent from this file and must stay above the file reader.
+    for (size_t selector_idx = 0; selector_idx < path.selectors.size(); ++selector_idx) {
+        const auto global_child_index =
+                struct_child_index(*current_mapping, path.selectors[selector_idx]);
+        if (!global_child_index.has_value()) {
+            *root_projection = {};
+            return NestedProjectionResolveResult::NOT_REPRESENTED;
+        }
+        const auto* child_mapping = resolve_mapped_child(*current_mapping, *global_child_index);
+        if (child_mapping == nullptr) {
+            *root_projection = {};
+            return NestedProjectionResolveResult::NOT_REPRESENTED;
+        }
+        if (!child_mapping->file_local_id.has_value()) {
+            *root_projection = {};
+            return NestedProjectionResolveResult::MISSING_FILE_CHILD;
+        }
+
+        auto child_projection = LocalColumnIndex::partial_local(*child_mapping->file_local_id);
+        child_projection.project_all_children = selector_idx + 1 == path.selectors.size();
+        current_projection->children.push_back(std::move(child_projection));
+        current_projection = &current_projection->children.back();
+        current_mapping = child_mapping;
+    }
+    return NestedProjectionResolveResult::RESOLVED;
+}
+
+static bool table_root_is_struct(const ColumnMapping& mapping) {
+    return struct_type_or_null(mapping.table_type) != nullptr;
+}
+
+static const std::vector<ColumnDefinition>& scan_file_children(const ColumnMapping& mapping) {
+    return !mapping.projected_file_children.empty() ? mapping.projected_file_children
+                                                    : mapping.original_file_children;
+}
+
+static const ColumnDefinition* resolve_file_leaf_from_projection(
+        const std::vector<ColumnDefinition>& children, const LocalColumnIndex& projection) {
+    const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) {
+        return child.file_local_id() == projection.local_id();
+    });
+    if (child_it == children.end()) {
+        return nullptr;
+    }
+    if (projection.children.empty()) {
+        return &*child_it;
+    }
+    if (projection.children.size() != 1) {
+        return nullptr;
+    }
+    return resolve_file_leaf_from_projection(child_it->children, projection.children[0]);
+}
+
+static bool collect_file_child_names_from_projection(const std::vector<ColumnDefinition>& children,
+                                                     const LocalColumnIndex& projection,
+                                                     std::vector<std::string>* file_child_names,
+                                                     std::vector<DataTypePtr>* file_child_types) {
+    DORIS_CHECK(file_child_names != nullptr);
+    DORIS_CHECK(file_child_types != nullptr);
+    const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) {
+        return child.file_local_id() == projection.local_id();
+    });
+    if (child_it == children.end()) {
+        return false;
+    }
+    file_child_names->push_back(child_it->name);
+    file_child_types->push_back(child_it->type);
+    if (projection.children.empty()) {
+        return true;
+    }
+    if (projection.children.size() != 1) {
+        return false;
+    }
+    return collect_file_child_names_from_projection(child_it->children, projection.children[0],
+                                                    file_child_names, file_child_types);
+}
+
+struct NestedPredicateTarget {
+    LocalColumnIndex file_projection;
+    FileNestedPredicateTarget file_target;
+    std::string leaf_name;
+    DataTypePtr leaf_type;
+};
+
+static std::unique_ptr<FileStructPredicateTarget> build_struct_predicate_target_from_projection(
+        const std::vector<ColumnDefinition>& children, const LocalColumnIndex& projection) {
+    const auto child_it = std::ranges::find_if(children, [&](const ColumnDefinition& child) {
+        return child.file_local_id() == projection.local_id();
+    });
+    if (child_it == children.end()) {
+        return nullptr;
+    }
+    std::unique_ptr<FileStructPredicateTarget> nested_child;
+    if (!projection.children.empty()) {
+        if (projection.children.size() != 1) {
+            return nullptr;
+        }
+        nested_child = build_struct_predicate_target_from_projection(child_it->children,
+                                                                     projection.children[0]);
+        if (nested_child == nullptr) {
+            return nullptr;
+        }
+    }
+    return std::make_unique<FileStructPredicateTarget>(child_it->file_local_id(), child_it->name,
+                                                       std::move(nested_child));
+}
+
+static bool build_struct_predicate_target(const ColumnMapping& root_mapping,
+                                          const LocalColumnIndex& root_projection,
+                                          FileNestedPredicateTarget* file_target) {
+    DORIS_CHECK(file_target != nullptr);
+    if (!root_projection.column_id().is_valid() || root_projection.children.size() != 1) {
+        return false;
+    }
+    auto struct_target = build_struct_predicate_target_from_projection(
+            root_mapping.original_file_children, root_projection.children[0]);
+    if (struct_target == nullptr) {
+        return false;
+    }
+    *file_target = FileNestedPredicateTarget(root_projection.column_id(), std::move(struct_target));
+    return true;
+}
+
+static bool resolve_nested_predicate_target(const NestedStructPath& path,
+                                            const std::vector<ColumnMapping>& mappings,
+                                            NestedPredicateTarget* target) {
+    DORIS_CHECK(target != nullptr);
+    ResolvedNestedStructPath resolved;
+    if (!resolve_nested_struct_path_for_file(path, mappings, &resolved)) {
+        return false;
+    }
+
+    const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) {
+        return mapping.global_index == path.root_global_index;
+    });
+    if (mapping_it == mappings.end() || resolved.file_projection.children.size() != 1) {
+        return false;
+    }
+    const auto* file_leaf = resolve_file_leaf_from_projection(mapping_it->original_file_children,
+                                                              resolved.file_projection.children[0]);
+    if (file_leaf == nullptr || file_leaf->type == nullptr) {
+        return false;
+    }
+    target->leaf_type = remove_nullable(file_leaf->type);
+    if (is_complex_type(target->leaf_type->get_primitive_type())) {
+        return false;
+    }
+    target->leaf_name = file_leaf->name;
+    target->file_projection = std::move(resolved.file_projection);
+    if (!build_struct_predicate_target(*mapping_it, target->file_projection,
+                                       &target->file_target)) {
+        return false;
+    }
+    return true;
+}
+
+static VExprSPtr original_table_literal_for_nested_predicate(const VExprSPtr& literal_expr) {
+    DORIS_CHECK(literal_expr != nullptr);
+    DORIS_CHECK(literal_expr->is_literal());
+    const auto* rewritten_literal = dynamic_cast<const SplitLocalFileLiteral*>(literal_expr.get());
+    if (rewritten_literal == nullptr) {
+        return literal_expr;
+    }
+    return VLiteral::create_shared(rewritten_literal->original_type(),
+                                   rewritten_literal->original_field());
+}
+
+static std::optional<PredicateType> to_column_predicate_type(TExprOpcode::type opcode) {
+    switch (opcode) {
+    case TExprOpcode::EQ:
+        return PredicateType::EQ;
+    case TExprOpcode::NE:
+        return PredicateType::NE;
+    case TExprOpcode::GT:
+        return PredicateType::GT;
+    case TExprOpcode::GE:
+        return PredicateType::GE;
+    case TExprOpcode::LT:
+        return PredicateType::LT;
+    case TExprOpcode::LE:
+        return PredicateType::LE;
+    default:
+        return std::nullopt;
+    }
+}
+
+static TExprOpcode::type reverse_comparison_opcode(TExprOpcode::type opcode) {
+    switch (opcode) {
+    case TExprOpcode::GT:
+        return TExprOpcode::LT;
+    case TExprOpcode::GE:
+        return TExprOpcode::LE;
+    case TExprOpcode::LT:
+        return TExprOpcode::GT;
+    case TExprOpcode::LE:
+        return TExprOpcode::GE;
+    default:
+        return opcode;
+    }
+}
+
+static std::shared_ptr<ColumnPredicate> create_comparison_column_predicate(
+        PredicateType predicate_type, uint32_t column_id, const std::string& column_name,
+        const DataTypePtr& data_type, const Field& value) {
+    switch (predicate_type) {
+    case PredicateType::EQ:
+        return create_comparison_predicate<PredicateType::EQ>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::NE:
+        return create_comparison_predicate<PredicateType::NE>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::GT:
+        return create_comparison_predicate<PredicateType::GT>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::GE:
+        return create_comparison_predicate<PredicateType::GE>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::LT:
+        return create_comparison_predicate<PredicateType::LT>(column_id, column_name, data_type,
+                                                              value, false);
+    case PredicateType::LE:
+        return create_comparison_predicate<PredicateType::LE>(column_id, column_name, data_type,
+                                                              value, false);
+    default:
+        return nullptr;
+    }
+}
+
+static bool extract_child_id_path_from_projection(const LocalColumnIndex& root_projection,
+                                                  std::vector<int32_t>* file_child_id_path) {
+    DORIS_CHECK(file_child_id_path != nullptr);
+    file_child_id_path->clear();
+    const auto* current_projection = &root_projection;
+    while (!current_projection->children.empty()) {
+        if (current_projection->children.size() != 1) {
+            file_child_id_path->clear();
+            return false;
+        }
+        current_projection = &current_projection->children[0];
+        file_child_id_path->push_back(current_projection->local_id());
+    }
+    return !file_child_id_path->empty();
+}
+
+static std::shared_ptr<ColumnPredicate> build_nested_comparison_predicate(
+        const VExprSPtr& literal_expr, TExprOpcode::type opcode, LocalColumnId root_file_column_id,
+        const std::string& leaf_name, const DataTypePtr& file_leaf_type) {
+    if (literal_expr == nullptr || !literal_expr->is_literal() || file_leaf_type == nullptr) {
+        return nullptr;
+    }
+    const auto predicate_type = to_column_predicate_type(opcode);
+    if (!predicate_type.has_value()) {
+        return nullptr;
+    }
+    const auto original_literal = original_table_literal_for_nested_predicate(literal_expr);
+    const Field original_field = literal_field(original_literal);
+    Field file_field;
+    try {
+        convert_field_to_type(original_field, *file_leaf_type, &file_field,
+                              original_literal->data_type().get());
+    } catch (const Exception&) {
+        return nullptr;
+    }
+    if (file_field.is_null()) {
+        return nullptr;
+    }
+    try {
+        return create_comparison_column_predicate(*predicate_type,
+                                                  cast_set<uint32_t>(root_file_column_id.value()),
+                                                  leaf_name, file_leaf_type, file_field);
+    } catch (const Exception&) {
+        return nullptr;
+    }
+}
+
+static std::shared_ptr<ColumnPredicate> build_nested_in_list_predicate(
+        const VExprSPtrs& literal_exprs, LocalColumnId root_file_column_id,
+        const std::string& leaf_name, const DataTypePtr& file_leaf_type) {
+    if (literal_exprs.empty() || file_leaf_type == nullptr) {
+        return nullptr;
+    }
+
+    auto value_column = file_leaf_type->create_column();
+    for (const auto& literal_expr : literal_exprs) {
+        if (literal_expr == nullptr || !literal_expr->is_literal()) {
+            return nullptr;
+        }
+        const auto original_literal = original_table_literal_for_nested_predicate(literal_expr);
+        const Field original_field = literal_field(original_literal);
+        Field file_field;
+        try {
+            convert_field_to_type(original_field, *file_leaf_type, &file_field,
+                                  original_literal->data_type().get());
+        } catch (const Exception&) {
+            return nullptr;
+        }
+        if (file_field.is_null()) {
+            return nullptr;
+        }
+        value_column->insert(file_field);
+    }
+
+    std::shared_ptr<HybridSetBase> values;
+    try {
+        values.reset(create_set(file_leaf_type->get_primitive_type(), literal_exprs.size(), false));
+        ColumnPtr value_column_ptr = std::move(value_column);
+        values->insert_range_from(value_column_ptr, 0, value_column_ptr->size());
+        return create_in_list_predicate<PredicateType::IN_LIST>(
+                cast_set<uint32_t>(root_file_column_id.value()), leaf_name, file_leaf_type, values,
+                false);
+    } catch (const Exception&) {
+        return nullptr;
+    }
+}
+
+static std::shared_ptr<ColumnPredicate> build_nested_null_predicate(
+        bool is_null, LocalColumnId root_file_column_id, const std::string& leaf_name,
+        const DataTypePtr& file_leaf_type) {
+    if (file_leaf_type == nullptr) {
+        return nullptr;
+    }
+    const auto leaf_primitive_type = remove_nullable(file_leaf_type)->get_primitive_type();
+    return NullPredicate::create_shared(cast_set<uint32_t>(root_file_column_id.value()), leaf_name,
+                                        is_null, leaf_primitive_type);
+}
+
+static bool set_nested_column_filter_target(const NestedPredicateTarget& target,
+                                            FileColumnPredicateFilter* column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    std::vector<int32_t> file_child_id_path;
+    if (!extract_child_id_path_from_projection(target.file_projection, &file_child_id_path)) {
+        return false;
+    }
+    column_filter->file_column_id = target.file_projection.column_id();
+    column_filter->file_child_id_path = std::move(file_child_id_path);
+    column_filter->target = target.file_target;
+    return true;
+}
+
+static bool extract_nested_binary_comparison_filter(const VExprSPtr& expr,
+                                                    const std::vector<ColumnMapping>& mappings,
+                                                    FileColumnPredicateFilter* column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    if (!is_binary_comparison_predicate(expr)) {
+        return false;
+    }
+    NestedStructPath path;
+    VExprSPtr literal_expr;
+    TExprOpcode::type opcode = expr->op();
+    if (extract_nested_struct_path_for_pruning(expr->children()[0], &path) &&
+        expr->children()[1]->is_literal()) {
+        literal_expr = expr->children()[1];
+    } else if (extract_nested_struct_path_for_pruning(expr->children()[1], &path) &&
+               expr->children()[0]->is_literal()) {
+        literal_expr = expr->children()[0];
+        opcode = reverse_comparison_opcode(opcode);
+    } else {
+        return false;
+    }
+
+    NestedPredicateTarget target;
+    if (!resolve_nested_predicate_target(path, mappings, &target)) {
+        return false;
+    }
+    auto predicate = build_nested_comparison_predicate(literal_expr, opcode,
+                                                       target.file_projection.column_id(),
+                                                       target.leaf_name, target.leaf_type);
+    if (predicate == nullptr) {
+        return false;
+    }
+    if (!set_nested_column_filter_target(target, column_filter)) {
+        return false;
+    }
+    column_filter->predicates.push_back(std::move(predicate));
+    return true;
+}
+
+static bool extract_nested_in_list_filter(const VExprSPtr& expr,
+                                          const std::vector<ColumnMapping>& mappings,
+                                          FileColumnPredicateFilter* column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    if (expr == nullptr || expr->node_type() != TExprNodeType::IN_PRED ||
+        expr->get_num_children() < 2) {
+        return false;
+    }
+    if (const auto* in_predicate = dynamic_cast<const VInPredicate*>(expr.get());
+        in_predicate != nullptr && in_predicate->is_not_in()) {
+        return false;
+    }
+
+    NestedStructPath path;
+    if (!extract_nested_struct_path_for_pruning(expr->children()[0], &path)) {
+        return false;
+    }
+
+    VExprSPtrs literal_exprs;
+    literal_exprs.reserve(expr->get_num_children() - 1);
+    for (size_t child_idx = 1; child_idx < expr->children().size(); ++child_idx) {
+        if (!expr->children()[child_idx]->is_literal()) {
+            return false;
+        }
+        literal_exprs.push_back(expr->children()[child_idx]);
+    }
+
+    NestedPredicateTarget target;
+    if (!resolve_nested_predicate_target(path, mappings, &target)) {
+        return false;
+    }
+    auto predicate = build_nested_in_list_predicate(
+            literal_exprs, target.file_projection.column_id(), target.leaf_name, target.leaf_type);
+    if (predicate == nullptr) {
+        return false;
+    }
+    if (!set_nested_column_filter_target(target, column_filter)) {
+        return false;
+    }
+    column_filter->predicates.push_back(std::move(predicate));
+    return true;
+}
+
+static bool extract_nested_null_filter(const VExprSPtr& expr,
+                                       const std::vector<ColumnMapping>& mappings,
+                                       FileColumnPredicateFilter* column_filter) {
+    DORIS_CHECK(column_filter != nullptr);
+    bool is_null = false;
+    if (!is_null_predicate_function(expr, &is_null)) {
+        return false;
+    }
+
+    NestedStructPath path;
+    if (!extract_nested_struct_path_for_pruning(expr->children()[0], &path)) {
+        return false;
+    }
+
+    NestedPredicateTarget target;
+    if (!resolve_nested_predicate_target(path, mappings, &target)) {
+        return false;
+    }
+    auto predicate = build_nested_null_predicate(is_null, target.file_projection.column_id(),
+                                                 target.leaf_name, target.leaf_type);
+    if (predicate == nullptr) {
+        return false;
+    }
+    if (!set_nested_column_filter_target(target, column_filter)) {
+        return false;
+    }
+    column_filter->predicates.push_back(std::move(predicate));
+    return true;
+}
+
+} // namespace
+
+SplitLocalFileLiteral::SplitLocalFileLiteral(const DataTypePtr& file_type, const Field& file_field,
+                                             DataTypePtr original_type, Field original_field)
+        : VLiteral(file_type, file_field),
+          _original_type(std::move(original_type)),
+          _original_field(std::move(original_field)) {}
+
+GlobalIndex slot_ref_global_index(const VSlotRef& slot_ref) {
+    DORIS_CHECK(slot_ref.column_id() >= 0);
+    return GlobalIndex(cast_set<size_t>(slot_ref.column_id()));
+}
+
+bool is_struct_element_expr(const VExprSPtr& expr) {
+    if (expr == nullptr || expr->get_num_children() != 2) {
+        return false;
+    }
+    const auto& function_name = expr->fn().name.function_name;
+    if (function_name == "struct_element") {
+        return true;
+    }
+    if (function_name != "element_at") {
+        return false;
+    }
+    const auto& parent_type = expr->children()[0]->data_type();
+    return parent_type != nullptr &&
+           remove_nullable(parent_type)->get_primitive_type() == TYPE_STRUCT;
+}
+
+Field literal_field(const VExprSPtr& literal_expr) {
+    DORIS_CHECK(literal_expr != nullptr);
+    DORIS_CHECK(literal_expr->is_literal());
+    const auto* literal = dynamic_cast<const VLiteral*>(literal_expr.get());
+    DORIS_CHECK(literal != nullptr);
+    Field field;
+    literal->get_column_ptr()->get(0, field);
+    return field;
+}
+
+bool resolve_nested_struct_path_for_file(const NestedStructPath& path,
+                                         const std::vector<ColumnMapping>& mappings,
+                                         ResolvedNestedStructPath* resolved,
+                                         bool require_scan_projection) {
+    DORIS_CHECK(resolved != nullptr);
+    *resolved = {};
+    const auto mapping_it = std::ranges::find_if(mappings, [&](const ColumnMapping& mapping) {
+        return mapping.global_index == path.root_global_index;
+    });
+    if (mapping_it == mappings.end() || !mapping_it->file_local_id.has_value() ||
+        path.selectors.empty()) {
+        return false;
+    }
+
+    // Prefer ColumnMapping over schema-name lookup. This is the only path that can correctly
+    // localize renamed Iceberg fields: a table filter `element_at(s, 'renamed_b')` must become a
+    // file filter on physical child `b`, even if the old file type is `STRUCT<b ...>`.
+    const auto mapping_result =
+            resolve_nested_projection_with_mapping(path, mappings, &resolved->file_projection);
+    if (mapping_result == NestedProjectionResolveResult::MISSING_FILE_CHILD) {
+        return false;
+    }
+    if (mapping_result == NestedProjectionResolveResult::NOT_REPRESENTED) {
+        if (!table_root_is_struct(*mapping_it)) {
+            return false;
+        }
+        LocalColumnIndex child_projection;
+        if (!build_file_child_projection_from_schema(mapping_it->original_file_children,
+                                                     path.selectors, &child_projection)
+                     .ok() ||
+            child_projection.local_id() < 0) {
+            return false;
+        }
+        resolved->file_projection = LocalColumnIndex::partial_local(*mapping_it->file_local_id);
+        resolved->file_projection.children.push_back(std::move(child_projection));
+    }
+
+    if (resolved->file_projection.children.size() != 1) {
+        *resolved = {};
+        return false;
+    }
+    // When rewriting the final localized element_at chain, it executes on the file column produced
+    // by this scan, so the intermediate return types must match the projected file shape, not the
+    // full historical file schema. Example:
+    //   SELECT s.c WHERE element_at(element_at(s, 'b'), 'cc') LIKE 'NestedC%'
+    // reads only b.cc and c; the inner element_at(s, 'b') returns Struct(cc), not
+    // Struct(cc, new_dd).
+    //
+    // Earlier projection collection also calls this resolver before filter-only children have been
+    // merged into the scan projection. That phase only needs the file path, so it still resolves
+    // names/types from the original file schema.
+    const auto& child_source = require_scan_projection ? scan_file_children(*mapping_it)
+                                                       : mapping_it->original_file_children;
+    if (!collect_file_child_names_from_projection(
+                child_source, resolved->file_projection.children[0], &resolved->file_child_names,
+                &resolved->file_child_types) ||
+        resolved->file_child_names.size() != path.selectors.size() ||
+        resolved->file_child_types.size() != path.selectors.size()) {
+        *resolved = {};
+        return false;
+    }
+    return true;
+}
+
+bool resolve_nested_struct_expr_for_file(const VExprSPtr& expr,
+                                         const std::vector<ColumnMapping>& mappings,
+                                         ResolvedNestedStructPath* resolved) {
+    DORIS_CHECK(resolved != nullptr);
+    NestedStructPath path;
+    if (!extract_nested_struct_path(expr, &path)) {
+        *resolved = {};
+        return false;
+    }
+    return resolve_nested_struct_path_for_file(path, mappings, resolved, true);
+}
+
+// Collect nested struct leaf references that can be turned into file-reader projections and
+// primitive pruning predicates. For example, from `s.a > 1 AND element_at(s, 'b') = 2`, this
+// records two paths rooted at `s`: `s -> a` and `s -> b`. Non-struct expressions are traversed
+// recursively, while a recognized struct path is emitted once so the caller can merge it into the
+// scan projection for that top-level file column.
+void collect_nested_struct_paths(const VExprSPtr& expr, std::vector<NestedStructPath>* paths) {
+    DORIS_CHECK(paths != nullptr);
+    if (expr == nullptr) {
+        return;
+    }
+    NestedStructPath path;
+    if (extract_nested_struct_path_for_pruning(expr, &path)) {
+        paths->push_back(std::move(path));
+        return;
+    }
+    for (const auto& child : expr->children()) {
+        collect_nested_struct_paths(child, paths);
+    }
+}
+
+std::vector<const ColumnMapping*> present_child_mappings_in_file_order(
+        const std::vector<ColumnMapping>& child_mappings) {
+    std::vector<const ColumnMapping*> result;
+    result.reserve(child_mappings.size());
+    for (const auto& child_mapping : child_mappings) {
+        if (child_mapping.file_local_id.has_value()) {
+            result.push_back(&child_mapping);
+        }
+    }
+    std::ranges::sort(result, [](const ColumnMapping* lhs, const ColumnMapping* rhs) {
+        DORIS_CHECK(lhs->file_local_id.has_value());
+        DORIS_CHECK(rhs->file_local_id.has_value());
+        return *lhs->file_local_id < *rhs->file_local_id;
+    });
+    return result;
+}
+
+// Build the nested child projection under a top-level file column by walking file schema children
+// directly. The returned projection does not include the root column id; callers attach it under a
+// `LocalColumnIndex::partial_local(root_id)` when merging into the scan request.
+Status build_file_child_projection_from_schema(const std::vector<ColumnDefinition>& children,
+                                               std::span<const StructChildSelector> selectors,
+                                               LocalColumnIndex* projection) {
+    DORIS_CHECK(projection != nullptr);
+    if (selectors.empty()) {
+        return Status::InvalidArgument("Nested struct selector path is empty");
+    }
+    const auto* child = resolve_file_child(children, selectors.front());
+    if (child == nullptr) {
+        return Status::OK();
+    }
+    *projection = LocalColumnIndex::local(child->file_local_id());
+    projection->project_all_children = selectors.size() == 1;
+    projection->children.clear();
+    if (selectors.size() == 1) {
+        return Status::OK();
+    }
+    if (child->children.empty() ||
+        remove_nullable(child->type)->get_primitive_type() != TYPE_STRUCT) {
+        *projection = LocalColumnIndex {};
+        return Status::OK();
+    }
+    LocalColumnIndex child_projection;
+    RETURN_IF_ERROR(build_file_child_projection_from_schema(child->children, selectors.subspan(1),
+                                                            &child_projection));
+    if (child_projection.local_id() < 0) {
+        *projection = LocalColumnIndex {};
+        return Status::OK();
+    }
+    projection->children.push_back(std::move(child_projection));
+    return Status::OK();
+}
+
+// Merge predicates that target the same physical file column or nested leaf. For example,
+// `WHERE s.b > 1 AND s.b < 10` produces two predicates for the same target `s -> b`; keeping them
+// in one FileColumnPredicateFilter lets the file reader apply both pruning checks to the same leaf
+// instead of carrying duplicate target entries.
+void merge_column_predicate_filter(FileColumnPredicateFilter column_filter,
+                                   std::vector<FileColumnPredicateFilter>* filters) {
+    DORIS_CHECK(filters != nullptr);
+    auto existing_filter_it = std::ranges::find_if(*filters, [&](const auto& existing_filter) {
+        return existing_filter.same_target_as(column_filter);
+    });
+    if (existing_filter_it == filters->end()) {
+        filters->push_back(std::move(column_filter));
+        return;
+    }
+    existing_filter_it->predicates.insert(existing_filter_it->predicates.end(),
+                                          column_filter.predicates.begin(),
+                                          column_filter.predicates.end());
+}
+
+// Extract file-column pruning predicates from localized row-level conjuncts that reference nested
+// struct leaves. This is separate from file_request->conjuncts: conjuncts do row filtering, while
+// FileColumnPredicateFilter carries primitive leaf predicates for file/page/statistics pruning.
+//
+// Example: for `WHERE s.b.c > 10 AND element_at(s, 'd') IS NOT NULL`, this function emits pruning
+// filters for the nested targets `s -> b -> c` and `s -> d`. The caller only invokes it after
+// table_filter_has_only_local_entries() succeeds, so each root slot already has a file-local scan
+// source in _filter_entries.
+void collect_nested_column_predicate_filters(const VExprSPtr& expr,
+                                             const std::vector<ColumnMapping>& mappings,
+                                             std::vector<FileColumnPredicateFilter>* filters) {
+    DORIS_CHECK(filters != nullptr);
+    if (expr == nullptr) {
+        return;
+    }
+    if (expr->node_type() == TExprNodeType::COMPOUND_PRED &&
+        expr->op() == TExprOpcode::COMPOUND_AND) {
+        for (const auto& child : expr->children()) {
+            collect_nested_column_predicate_filters(child, mappings, filters);
+        }
+        return;
+    }
+    FileColumnPredicateFilter column_filter;
+    if (extract_nested_binary_comparison_filter(expr, mappings, &column_filter) ||
+        extract_nested_in_list_filter(expr, mappings, &column_filter) ||
+        extract_nested_null_filter(expr, mappings, &column_filter)) {
+        merge_column_predicate_filter(std::move(column_filter), filters);
+    }
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/column_mapper_nested.h b/be/src/format_v2/column_mapper_nested.h
new file mode 100644
index 00000000000000..b8b3f1f3334a8f
--- /dev/null
+++ b/be/src/format_v2/column_mapper_nested.h
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <span>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/field.h"
+#include "exprs/vexpr_fwd.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/file_reader.h"
+
+namespace doris::format {
+
+struct StructChildSelector {
+    bool by_name = true;
+    std::string name;
+    size_t ordinal = 0;
+};
+
+struct NestedStructPath {
+    GlobalIndex root_global_index;
+    std::vector<StructChildSelector> selectors;
+};
+
+struct ResolvedNestedStructPath {
+    LocalColumnIndex file_projection;
+    std::vector<std::string> file_child_names;
+    std::vector<DataTypePtr> file_child_types;
+};
+
+// A split-local literal produced by slot-literal predicate localization. This wrapper keeps the
+// original table literal so a cloned conjunct can be localized again for another split.
+class SplitLocalFileLiteral final : public VLiteral {
+public:
+    SplitLocalFileLiteral(const DataTypePtr& file_type, const Field& file_field,
+                          DataTypePtr original_type, Field original_field);
+
+    const DataTypePtr& original_type() const { return _original_type; }
+    const Field& original_field() const { return _original_field; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        Field file_field;
+        get_column_ptr()->get(0, file_field);
+        *cloned_expr = std::make_shared<SplitLocalFileLiteral>(_data_type, file_field,
+                                                               _original_type, _original_field);
+        return Status::OK();
+    }
+
+private:
+    DataTypePtr _original_type;
+    Field _original_field;
+};
+
+GlobalIndex slot_ref_global_index(const VSlotRef& slot_ref);
+bool is_struct_element_expr(const VExprSPtr& expr);
+Field literal_field(const VExprSPtr& literal_expr);
+
+bool resolve_nested_struct_path_for_file(const NestedStructPath& path,
+                                         const std::vector<ColumnMapping>& mappings,
+                                         ResolvedNestedStructPath* resolved,
+                                         bool require_scan_projection = false);
+
+bool resolve_nested_struct_expr_for_file(const VExprSPtr& expr,
+                                         const std::vector<ColumnMapping>& mappings,
+                                         ResolvedNestedStructPath* resolved);
+
+void collect_nested_struct_paths(const VExprSPtr& expr, std::vector<NestedStructPath>* paths);
+
+std::vector<const ColumnMapping*> present_child_mappings_in_file_order(
+        const std::vector<ColumnMapping>& child_mappings);
+
+Status build_file_child_projection_from_schema(const std::vector<ColumnDefinition>& children,
+                                               std::span<const StructChildSelector> selectors,
+                                               LocalColumnIndex* projection);
+
+void merge_column_predicate_filter(FileColumnPredicateFilter column_filter,
+                                   std::vector<FileColumnPredicateFilter>* filters);
+
+void collect_nested_column_predicate_filters(const VExprSPtr& expr,
+                                             const std::vector<ColumnMapping>& mappings,
+                                             std::vector<FileColumnPredicateFilter>* filters);
+
+} // namespace doris::format
diff --git a/be/src/format_v2/delimited_text/csv_reader.cpp b/be/src/format_v2/delimited_text/csv_reader.cpp
new file mode 100644
index 00000000000000..711146a9880479
--- /dev/null
+++ b/be/src/format_v2/delimited_text/csv_reader.cpp
@@ -0,0 +1,295 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/csv_reader.h"
+
+#include <cstring>
+#include <utility>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type_serde/data_type_string_serde.h"
+#include "format/file_reader/new_plain_binary_line_reader.h"
+#include "format/file_reader/new_plain_text_line_reader.h"
+#include "gen_cpp/internal_service.pb.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/decompressor.h"
+#include "util/utf8_check.h"
+
+namespace doris::format::csv {
+namespace {
+
+bool starts_with_at(const Slice& line, size_t pos, const std::string& needle) {
+    return !needle.empty() && pos + needle.size() <= line.size &&
+           std::memcmp(line.data + pos, needle.data(), needle.size()) == 0;
+}
+
+bool is_csv_text_format(TFileFormatType::type format_type) {
+    switch (format_type) {
+    case TFileFormatType::FORMAT_CSV_PLAIN:
+    case TFileFormatType::FORMAT_CSV_GZ:
+    case TFileFormatType::FORMAT_CSV_BZ2:
+    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+    case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
+    case TFileFormatType::FORMAT_CSV_LZOP:
+    case TFileFormatType::FORMAT_CSV_SNAPPYBLOCK:
+    case TFileFormatType::FORMAT_CSV_DEFLATE:
+        return true;
+    default:
+        return false;
+    }
+}
+
+} // namespace
+
+CsvReader::CsvReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                     std::unique_ptr<io::FileDescription>& file_description,
+                     std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                     const TFileScanRangeParams* scan_params,
+                     const std::vector<SlotDescriptor*>& file_slot_descs,
+                     TFileCompressType::type range_compress_type,
+                     std::optional<TUniqueId> stream_load_id)
+        : DelimitedTextReader(system_properties, file_description, std::move(io_ctx), profile,
+                              scan_params, file_slot_descs, range_compress_type,
+                              std::move(stream_load_id), "CSV") {}
+
+CsvReader::~CsvReader() = default;
+
+Status CsvReader::_init_format_state() {
+    _file_format_type = _scan_params->format_type;
+    _file_compress_type =
+            _range_compress_type != TFileCompressType::UNKNOWN
+                    ? _range_compress_type
+                    : (_scan_params->__isset.compress_type ? _scan_params->compress_type
+                                                           : TFileCompressType::UNKNOWN);
+    if (_file_compress_type == TFileCompressType::UNKNOWN &&
+        _file_format_type == TFileFormatType::FORMAT_CSV_PLAIN) {
+        // FORMAT_CSV_PLAIN is an uncompressed byte stream even when FE does not fill
+        // compress_type. Non-first splits rely on this normalization; otherwise UNKNOWN would be
+        // rejected by the split-compressed-file guard in the shared reader base.
+        _file_compress_type = TFileCompressType::PLAIN;
+    }
+
+    const auto& text_params = _scan_params->file_attributes.text_params;
+    _value_separator = text_params.column_separator;
+    _line_delimiter = text_params.line_delimiter;
+    if (text_params.__isset.enclose) {
+        _enclose = text_params.enclose;
+    }
+    if (text_params.__isset.escape) {
+        _escape = text_params.escape;
+    }
+    _trim_tailing_spaces = _runtime_state != nullptr &&
+                           _runtime_state->trim_tailing_spaces_for_external_table_query();
+    _options.escape_char = _escape;
+    _options.quote_char = _enclose;
+    _options.collection_delim =
+            text_params.collection_delimiter.empty() ? ',' : text_params.collection_delimiter[0];
+    _options.map_key_delim =
+            text_params.mapkv_delimiter.empty() ? ':' : text_params.mapkv_delimiter[0];
+    if (text_params.__isset.null_format) {
+        _options.null_format = text_params.null_format.data();
+        _options.null_len = text_params.null_format.length();
+    }
+    if (_scan_params->file_attributes.__isset.trim_double_quotes) {
+        _trim_double_quotes = _scan_params->file_attributes.trim_double_quotes;
+    }
+    _options.converted_from_string = _trim_double_quotes;
+    if (_runtime_state != nullptr) {
+        _keep_cr = _runtime_state->query_options().keep_carriage_return;
+    }
+    if (text_params.__isset.empty_field_as_null) {
+        _empty_field_as_null = text_params.empty_field_as_null;
+    }
+    return Status::OK();
+}
+
+Status CsvReader::_create_decompressor() {
+    if (_file_compress_type != TFileCompressType::UNKNOWN) {
+        return Decompressor::create_decompressor(_file_compress_type, &_decompressor);
+    }
+    return Decompressor::create_decompressor(_file_format_type, &_decompressor);
+}
+
+Status CsvReader::_create_line_reader() {
+    if (is_csv_text_format(_file_format_type)) {
+        std::shared_ptr<TextLineReaderContextIf> text_line_reader_ctx;
+        if (_enclose == 0) {
+            text_line_reader_ctx = std::make_shared<PlainTextLineReaderCtx>(
+                    _line_delimiter, _line_delimiter.size(), _keep_cr);
+        } else {
+            // The enclosed-line context finds logical records that may span physical newlines.
+            // Field slicing still happens in `_split_line()` because the v2 scan request may ask
+            // for CSV ordinals in a different order from the physical file.
+            const size_t col_sep_num =
+                    _source_file_slot_descs.size() > 1 ? _source_file_slot_descs.size() - 1 : 0;
+            text_line_reader_ctx = std::make_shared<EncloseCsvLineReaderCtx>(
+                    _line_delimiter, _line_delimiter.size(), _value_separator,
+                    _value_separator.size(), col_sep_num, _enclose, _escape, _keep_cr);
+        }
+        _line_reader = NewPlainTextLineReader::create_unique(
+                _profile, _file_reader, _decompressor.get(), std::move(text_line_reader_ctx), _size,
+                _start_offset);
+        return Status::OK();
+    }
+    if (_file_format_type == TFileFormatType::FORMAT_PROTO) {
+        _line_reader = NewPlainBinaryLineReader::create_unique(_file_reader);
+        return Status::OK();
+    }
+    return Status::InternalError<false>("Unknown CSV format type {}", _file_format_type);
+}
+
+Status CsvReader::_validate_line(const Slice& line) {
+    if (_file_format_type != TFileFormatType::FORMAT_PROTO && _enable_text_validate_utf8 &&
+        !validate_utf8(line.data, line.size)) {
+        return Status::InternalError<false>("Only support csv data in utf8 codec");
+    }
+    return Status::OK();
+}
+
+void CsvReader::_split_line(const Slice& line) {
+    _split_values.clear();
+    if (_file_format_type == TFileFormatType::FORMAT_PROTO) {
+        auto** row_ptr = reinterpret_cast<PDataRow**>(line.data);
+        PDataRow* row = *row_ptr;
+        for (const PDataColumn& col : row->col()) {
+            _split_values.emplace_back(col.value());
+        }
+        return;
+    }
+
+    // The text line reader is responsible for split boundaries and multi-line quoted fields.
+    // Field slicing still happens here because FileScannerV2 asks columns by file-local id, so we
+    // must be able to materialize only the requested CSV ordinals without building a row object.
+    // Example: for `1,"a,b",10` and column separator `,`, this loop returns three slices:
+    // `1`, `a,b`, and `10`; the comma inside quotes does not create an extra field.
+    bool in_quote = false;
+    bool escaped = false;
+    size_t start = 0;
+    size_t i = 0;
+    while (i < line.size) {
+        const char ch = line.data[i];
+        if (_enclose != 0) {
+            if (escaped) {
+                escaped = false;
+                ++i;
+                continue;
+            }
+            if (_escape != 0 && ch == _escape) {
+                escaped = true;
+                ++i;
+                continue;
+            }
+            if (ch == _enclose) {
+                if (in_quote && i + 1 < line.size && line.data[i + 1] == _enclose) {
+                    i += 2;
+                    continue;
+                }
+                in_quote = !in_quote;
+                ++i;
+                continue;
+            }
+        }
+        if (!in_quote && starts_with_at(line, i, _value_separator)) {
+            size_t value_start = start;
+            size_t value_len = i - start;
+            while (_trim_tailing_spaces && value_len > 0 &&
+                   line.data[value_start + value_len - 1] == ' ') {
+                --value_len;
+            }
+            if (_trim_double_quotes && value_len > 1 && line.data[value_start] == '"' &&
+                line.data[value_start + value_len - 1] == '"') {
+                ++value_start;
+                value_len -= 2;
+            } else if (_enclose != 0 && value_len > 1 && line.data[value_start] == _enclose &&
+                       line.data[value_start + value_len - 1] == _enclose) {
+                ++value_start;
+                value_len -= 2;
+            }
+            _split_values.emplace_back(line.data + value_start, value_len);
+            i += _value_separator.size();
+            start = i;
+            continue;
+        }
+        ++i;
+    }
+
+    size_t value_start = start;
+    size_t value_len = line.size - start;
+    while (_trim_tailing_spaces && value_len > 0 && line.data[value_start + value_len - 1] == ' ') {
+        --value_len;
+    }
+    if (_trim_double_quotes && value_len > 1 && line.data[value_start] == '"' &&
+        line.data[value_start + value_len - 1] == '"') {
+        ++value_start;
+        value_len -= 2;
+    } else if (_enclose != 0 && value_len > 1 && line.data[value_start] == _enclose &&
+               line.data[value_start + value_len - 1] == _enclose) {
+        ++value_start;
+        value_len -= 2;
+    }
+    _split_values.emplace_back(line.data + value_start, value_len);
+}
+
+Status CsvReader::_deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                        Slice value) {
+    DORIS_CHECK(output != nullptr);
+    if (column.nullable_string_fast_path) {
+        auto& null_column = assert_cast<ColumnNullable&>(*output);
+        // String is the hottest CSV type. Avoid the generic nullable serde wrapper here:
+        // deserialize directly into the nested string column and append the null map bit ourselves.
+        if (_empty_field_as_null && value.size == 0) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        // CSV keeps empty-field handling separate from null_format matching. An empty
+        // null_format must not turn every empty CSV field into NULL unless FE explicitly sets
+        // empty_field_as_null; OpenCSV-compatible tables expect empty fields to stay empty strings.
+        if (_options.null_len > 0 && value.size == _options.null_len &&
+            std::memcmp(value.data, _options.null_format, value.size) == 0) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        static DataTypeStringSerDe string_serde(TYPE_STRING);
+        auto status = string_serde.deserialize_one_cell_from_csv(null_column.get_nested_column(),
+                                                                 value, _options);
+        if (!status.ok()) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        null_column.get_null_map_data().push_back(0);
+        return Status::OK();
+    }
+    return column.serde->deserialize_one_cell_from_csv(*output, value, _options);
+}
+
+Slice CsvReader::_normalize_value(Slice value) const {
+    if (_empty_field_as_null && value.size == 0) {
+        return Slice(_options.null_format, _options.null_len);
+    }
+    return value;
+}
+
+bool CsvReader::_can_split() const {
+    return (_file_compress_type == TFileCompressType::PLAIN) ||
+           (_file_compress_type == TFileCompressType::UNKNOWN &&
+            _file_format_type == TFileFormatType::FORMAT_CSV_PLAIN);
+}
+
+} // namespace doris::format::csv
diff --git a/be/src/format_v2/delimited_text/csv_reader.h b/be/src/format_v2/delimited_text/csv_reader.h
new file mode 100644
index 00000000000000..e5d1ce25a74f40
--- /dev/null
+++ b/be/src/format_v2/delimited_text/csv_reader.h
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+
+#include "format_v2/delimited_text/delimited_text_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "util/slice.h"
+
+namespace doris {
+class SlotDescriptor;
+} // namespace doris
+
+namespace doris::format::csv {
+
+// FileScannerV2 CSV reader.
+//
+// CSV files do not carry a physical schema. FE provides the table slot descriptors plus
+// TFileScanRangeParams::column_idxs, where each file slot maps to a CSV field ordinal. This reader
+// exposes that information as a v2 file-local schema and implements CSV parsing directly in the v2
+// FileReader contract.
+class CsvReader final : public ::doris::format::DelimitedTextReader {
+public:
+    // `file_slot_descs` must contain only columns physically readable from the CSV payload.
+    // Partition/default/virtual columns are materialized by TableReader after this reader returns
+    // a file-local block. Keeping that boundary is important because CSV has no embedded schema
+    // from which those non-file columns could be derived.
+    CsvReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+              std::unique_ptr<io::FileDescription>& file_description,
+              std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+              const TFileScanRangeParams* scan_params,
+              const std::vector<SlotDescriptor*>& file_slot_descs,
+              TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN,
+              std::optional<TUniqueId> stream_load_id = std::nullopt);
+    ~CsvReader() override;
+
+private:
+    Status _init_format_state() override;
+    Status _create_decompressor() override;
+    Status _create_line_reader() override;
+    Status _validate_line(const Slice& line) override;
+    void _split_line(const Slice& line) override;
+    Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                 Slice value) override;
+    Slice _normalize_value(Slice value) const override;
+    bool _can_split() const override;
+
+    TFileFormatType::type _file_format_type = TFileFormatType::FORMAT_CSV_PLAIN;
+    char _enclose = 0;
+    bool _trim_double_quotes = false;
+    bool _trim_tailing_spaces = false;
+    bool _empty_field_as_null = false;
+    bool _keep_cr = false;
+};
+
+} // namespace doris::format::csv
diff --git a/be/src/format_v2/delimited_text/delimited_text_reader.cpp b/be/src/format_v2/delimited_text/delimited_text_reader.cpp
new file mode 100644
index 00000000000000..f6e84b4aa7750e
--- /dev/null
+++ b/be/src/format_v2/delimited_text/delimited_text_reader.cpp
@@ -0,0 +1,644 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/delimited_text_reader.h"
+
+#include <algorithm>
+#include <cstring>
+#include <limits>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "format/line_reader.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/materialized_reader_util.h"
+#include "io/file_factory.h"
+#include "io/fs/tracing_file_reader.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/decompressor.h"
+#include "util/string_util.h"
+
+namespace doris::format {
+namespace {
+
+constexpr const char* DELIMITED_TEXT_PROFILE = "DelimitedTextReader";
+
+void update_counter(RuntimeProfile::Counter* counter, int64_t value) {
+    if (counter != nullptr) {
+        COUNTER_UPDATE(counter, value);
+    }
+}
+
+DataTypePtr nullable_type(DataTypePtr type) {
+    return type != nullptr && type->is_nullable() ? std::move(type)
+                                                  : make_nullable(std::move(type));
+}
+
+DataTypePtr delimited_file_type_from_slot_type(const DataTypePtr& type) {
+    if (type == nullptr) {
+        return nullptr;
+    }
+
+    const bool is_nullable = type->is_nullable();
+    const auto nested_type = remove_nullable(type);
+    DataTypePtr file_type;
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_CHAR:
+    case TYPE_VARCHAR:
+        // Delimited text files do not carry CHAR/VARCHAR length metadata. FE slot types describe
+        // the table target type, not a bounded physical file type. Expose bounded strings as
+        // unbounded STRING on the file side so TableReader can later enforce the table length.
+        // Example: a TEXT field "hangzhou" mapped to table CHAR(3) must be read as STRING and
+        // truncated to "han" during table materialization.
+        file_type = std::make_shared<DataTypeString>();
+        break;
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        file_type = std::make_shared<DataTypeArray>(
+                delimited_file_type_from_slot_type(array_type->get_nested_type()));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        file_type = std::make_shared<DataTypeMap>(
+                delimited_file_type_from_slot_type(map_type->get_key_type()),
+                delimited_file_type_from_slot_type(map_type->get_value_type()));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        DataTypes file_children;
+        file_children.reserve(struct_type->get_elements().size());
+        for (const auto& child_type : struct_type->get_elements()) {
+            file_children.push_back(delimited_file_type_from_slot_type(child_type));
+        }
+        file_type =
+                std::make_shared<DataTypeStruct>(file_children, struct_type->get_element_names());
+        break;
+    }
+    default:
+        file_type = nested_type;
+        break;
+    }
+
+    return is_nullable ? make_nullable(file_type) : file_type;
+}
+
+ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id);
+
+std::vector<ColumnDefinition> synthesize_file_children_from_type(const DataTypePtr& type) {
+    std::vector<ColumnDefinition> children;
+    if (type == nullptr) {
+        return children;
+    }
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        children.push_back(synthetic_file_child("element", array_type->get_nested_type(), 0));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        children.push_back(synthetic_file_child("key", map_type->get_key_type(), 0));
+        children.push_back(synthetic_file_child("value", map_type->get_value_type(), 1));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        children.reserve(struct_type->get_elements().size());
+        for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) {
+            children.push_back(synthetic_file_child(struct_type->get_element_name(idx),
+                                                    struct_type->get_element(idx),
+                                                    cast_set<int32_t>(idx)));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return children;
+}
+
+ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id) {
+    ColumnDefinition child;
+    child.identifier = Field::create_field<TYPE_STRING>(name);
+    child.local_id = local_id;
+    child.name = name;
+    child.type = std::move(type);
+    child.children = synthesize_file_children_from_type(child.type);
+    return child;
+}
+
+} // namespace
+
+DelimitedTextReader::DelimitedTextReader(
+        std::shared_ptr<io::FileSystemProperties>& system_properties,
+        std::unique_ptr<io::FileDescription>& file_description,
+        std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+        const TFileScanRangeParams* scan_params,
+        const std::vector<SlotDescriptor*>& file_slot_descs,
+        TFileCompressType::type range_compress_type, std::optional<TUniqueId> stream_load_id,
+        std::string reader_name)
+        : FileReader(system_properties, file_description, std::move(io_ctx), profile),
+          _scan_params(scan_params),
+          _source_file_slot_descs(file_slot_descs),
+          _range_compress_type(range_compress_type),
+          _stream_load_id(std::move(stream_load_id)),
+          _reader_name(std::move(reader_name)) {}
+
+DelimitedTextReader::~DelimitedTextReader() {
+    static_cast<void>(close());
+}
+
+void DelimitedTextReader::_init_profile() {
+    if (_profile == nullptr || _text_profile.raw_lines_read != nullptr) {
+        return;
+    }
+
+    ADD_TIMER_WITH_LEVEL(_profile, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.open_file_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "OpenFileTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.create_line_reader_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "CreateLineReaderTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.read_line_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ReadLineTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.split_line_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "SplitLineTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.deserialize_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "DeserializeTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.conjunct_filter_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(_profile, "ConjunctFilterTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.delete_conjunct_filter_time = ADD_CHILD_TIMER_WITH_LEVEL(
+            _profile, "DeleteConjunctFilterTime", DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.raw_lines_read = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RawLinesRead", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.rows_read_before_filter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RowsReadBeforeFilter", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.rows_filtered_by_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RowsFilteredByConjunct", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.rows_filtered_by_delete_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RowsFilteredByDeleteConjunct", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.rows_returned = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "RowsReturned", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.empty_lines_read = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "EmptyLinesRead", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.skipped_lines = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "SkippedLines", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+    _text_profile.cells_deserialized = ADD_CHILD_COUNTER_WITH_LEVEL(
+            _profile, "CellsDeserialized", TUnit::UNIT, DELIMITED_TEXT_PROFILE, 1);
+}
+
+Status DelimitedTextReader::init(RuntimeState* state) {
+    _init_profile();
+    _runtime_state = state;
+    if (_scan_params == nullptr) {
+        return Status::InvalidArgument("{} v2 reader requires scan params", _reader_name);
+    }
+    if (_file_description == nullptr) {
+        return Status::InvalidArgument("{} v2 reader requires file description", _reader_name);
+    }
+    if (!_scan_params->__isset.file_attributes ||
+        !_scan_params->file_attributes.__isset.text_params) {
+        return Status::InvalidArgument("{} v2 reader requires text file attributes", _reader_name);
+    }
+    _enable_text_validate_utf8 = !_scan_params->file_attributes.__isset.enable_text_validate_utf8 ||
+                                 _scan_params->file_attributes.enable_text_validate_utf8;
+
+    RETURN_IF_ERROR(_init_format_state());
+
+    // Delimited text files have no physical column ids. FE sends `column_idxs` to describe how
+    // each physical file slot maps to a field ordinal in the text row. The local id exposed in the
+    // file schema is therefore the text-field ordinal, not the slot vector position.
+    _source_column_idxs.clear();
+    if (_scan_params->__isset.column_idxs && !_scan_params->column_idxs.empty()) {
+        if (_scan_params->column_idxs.size() != _source_file_slot_descs.size()) {
+            return Status::InvalidArgument(
+                    "{} v2 reader column_idxs size {} does not match file slot size {}",
+                    _reader_name, _scan_params->column_idxs.size(), _source_file_slot_descs.size());
+        }
+        _source_column_idxs.reserve(_scan_params->column_idxs.size());
+        for (const auto column_idx : _scan_params->column_idxs) {
+            _source_column_idxs.push_back(column_idx);
+        }
+    } else {
+        _source_column_idxs.reserve(_source_file_slot_descs.size());
+        for (size_t i = 0; i < _source_file_slot_descs.size(); ++i) {
+            _source_column_idxs.push_back(static_cast<int32_t>(i));
+        }
+    }
+
+    _source_serdes = create_data_type_serdes(_source_file_slot_descs);
+    _file_schema.clear();
+    _file_schema.reserve(_source_file_slot_descs.size());
+    for (size_t i = 0; i < _source_file_slot_descs.size(); ++i) {
+        const auto* slot = _source_file_slot_descs[i];
+        DORIS_CHECK(slot != nullptr);
+        ColumnDefinition field;
+        field.identifier = Field::create_field<TYPE_STRING>(slot->col_name());
+        field.local_id = _source_column_idxs[i];
+        field.name = slot->col_name();
+        field.type = nullable_type(delimited_file_type_from_slot_type(slot->get_data_type_ptr()));
+        // Delimited text stores a complex value in one top-level text field, but TableColumnMapper
+        // still needs semantic children to localize nested projections and predicates. Expose
+        // ARRAY element, MAP key/value, and STRUCT fields as file-schema children while keeping the
+        // top-level local id as the physical text field ordinal from column_idxs.
+        field.children = synthesize_file_children_from_type(field.type);
+        _file_schema.push_back(std::move(field));
+    }
+    _eof = false;
+    return Status::OK();
+}
+
+Status DelimitedTextReader::get_schema(std::vector<ColumnDefinition>* file_schema) const {
+    if (file_schema == nullptr) {
+        return Status::InvalidArgument("{} v2 file_schema is null", _reader_name);
+    }
+    *file_schema = _file_schema;
+    return Status::OK();
+}
+
+std::unique_ptr<TableColumnMapper> DelimitedTextReader::create_column_mapper(
+        TableColumnMapperOptions options) const {
+    return std::make_unique<MaterializedColumnMapper>(std::move(options));
+}
+
+Status DelimitedTextReader::open(std::shared_ptr<FileScanRequest> request) {
+    RETURN_IF_ERROR(FileReader::open(std::move(request)));
+    DORIS_CHECK(_request != nullptr);
+    RETURN_IF_ERROR(_build_requested_columns(*_request, &_requested_columns));
+    {
+        SCOPED_TIMER(_text_profile.open_file_time);
+        RETURN_IF_ERROR(_open_file());
+    }
+    RETURN_IF_ERROR(_create_decompressor());
+    {
+        SCOPED_TIMER(_text_profile.create_line_reader_time);
+        RETURN_IF_ERROR(_create_line_reader());
+    }
+    _line_reader_eof = false;
+    _bom_removed = false;
+    _eof = false;
+    return Status::OK();
+}
+
+Status DelimitedTextReader::get_block(Block* file_block, size_t* rows, bool* eof) {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    if (_line_reader == nullptr) {
+        return Status::InternalError("{} v2 reader is not open", _reader_name);
+    }
+
+    const auto batch_size = _runtime_state != nullptr ? _runtime_state->batch_size() : 4096;
+    const auto max_block_bytes = _runtime_state != nullptr
+                                         ? _runtime_state->preferred_block_size_bytes()
+                                         : std::numeric_limits<size_t>::max();
+    *rows = 0;
+    *eof = false;
+
+    {
+        auto columns_guard = file_block->mutate_columns_scoped();
+        auto& columns = columns_guard.mutable_columns();
+        // Delimited text readers are column-pruned but not lazy materialized: all file-local
+        // columns requested by TableReader are decoded before file-local conjuncts are evaluated.
+        while (*rows < batch_size && !_line_reader_eof &&
+               Block::columns_byte_size(columns) < max_block_bytes) {
+            Slice line;
+            bool line_eof = false;
+            RETURN_IF_ERROR(_read_next_line(&line, &line_eof));
+            if (line_eof) {
+                break;
+            }
+            RETURN_IF_ERROR(_fill_columns_from_line(line, &columns, rows));
+        }
+    }
+
+    const size_t rows_before_filter = *rows;
+    update_counter(_text_profile.rows_read_before_filter, rows_before_filter);
+
+    MaterializedReaderFilterProfile filter_profile;
+    filter_profile.delete_conjunct_filter_time = _text_profile.delete_conjunct_filter_time;
+    filter_profile.conjunct_filter_time = _text_profile.conjunct_filter_time;
+    filter_profile.rows_filtered_by_delete_conjunct =
+            _text_profile.rows_filtered_by_delete_conjunct;
+    filter_profile.rows_filtered_by_conjunct = _text_profile.rows_filtered_by_conjunct;
+    RETURN_IF_ERROR(apply_materialized_reader_filters(_request.get(), _io_ctx.get(), file_block,
+                                                      rows, &filter_profile));
+    update_counter(_text_profile.rows_returned, *rows);
+    _reader_statistics.read_rows += *rows;
+    *eof = _line_reader_eof && *rows == 0;
+    _eof = *eof;
+    return Status::OK();
+}
+
+Status DelimitedTextReader::get_aggregate_result(const FileAggregateRequest& request,
+                                                 FileAggregateResult* result) {
+    DORIS_CHECK(result != nullptr);
+    if (request.agg_type != TPushAggOp::type::COUNT) {
+        return Status::NotSupported("{} v2 reader only supports COUNT aggregate pushdown",
+                                    _reader_name);
+    }
+    if (_line_reader == nullptr) {
+        return Status::InternalError("{} v2 reader is not open", _reader_name);
+    }
+
+    int64_t count = 0;
+    while (!_line_reader_eof) {
+        Slice line;
+        bool line_eof = false;
+        RETURN_IF_ERROR(_read_next_line(&line, &line_eof));
+        if (line_eof) {
+            break;
+        }
+        if (line.size == 0) {
+            update_counter(_text_profile.empty_lines_read, 1);
+            if (_empty_line_as_record() ||
+                (_runtime_state != nullptr && _runtime_state->is_read_csv_empty_line_as_null())) {
+                ++count;
+            }
+            continue;
+        }
+        RETURN_IF_ERROR(_validate_line(line));
+        ++count;
+    }
+    result->count = count;
+    result->columns.clear();
+    update_counter(_text_profile.rows_read_before_filter, count);
+    update_counter(_text_profile.rows_returned, count);
+    _reader_statistics.read_rows += count;
+    _eof = true;
+    return Status::OK();
+}
+
+Status DelimitedTextReader::close() {
+    if (_line_reader != nullptr) {
+        _line_reader->close();
+        _line_reader.reset();
+    }
+    _decompressor.reset();
+    _file_reader.reset();
+    _tracing_file_reader.reset();
+    _requested_columns.clear();
+    return Status::OK();
+}
+
+bool DelimitedTextReader::_is_null_format(Slice value) const {
+    if (value.size != _options.null_len) {
+        return false;
+    }
+    if (_options.null_len == 0) {
+        return true;
+    }
+    return std::memcmp(value.data, _options.null_format, value.size) == 0;
+}
+
+Status DelimitedTextReader::_build_requested_columns(const FileScanRequest& request,
+                                                     std::vector<RequestedColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+
+    // `request.local_positions` is keyed by FileReader schema local id. For delimited text readers
+    // that local id is the field ordinal from column_idxs, so reverse-map it to the source slot
+    // descriptor before choosing the serde.
+    std::vector<RequestedColumn> by_position(request.local_positions.size());
+    for (const auto& [file_column_id, block_position] : request.local_positions) {
+        const auto source_it = std::find(_source_column_idxs.begin(), _source_column_idxs.end(),
+                                         file_column_id.value());
+        if (source_it == _source_column_idxs.end()) {
+            return Status::InvalidArgument("{} v2 request references unknown local column id {}",
+                                           _reader_name, file_column_id.value());
+        }
+        const auto source_index = std::distance(_source_column_idxs.begin(), source_it);
+        DORIS_CHECK(source_index >= 0 &&
+                    static_cast<size_t>(source_index) < _source_file_slot_descs.size());
+        if (block_position.value() >= by_position.size()) {
+            return Status::InvalidArgument("{} v2 request has invalid block position {}",
+                                           _reader_name, block_position.value());
+        }
+        const auto* slot = _source_file_slot_descs[source_index];
+        const auto type = slot->get_data_type_ptr();
+        RequestedColumn requested_column;
+        requested_column.file_column_id = file_column_id;
+        requested_column.block_position = block_position;
+        requested_column.slot_desc = slot;
+        requested_column.serde = _source_serdes[source_index];
+        requested_column.nullable_string_fast_path =
+                type->is_nullable() && is_string_type(type->get_primitive_type());
+        by_position[block_position.value()] = std::move(requested_column);
+    }
+
+    for (size_t i = 0; i < by_position.size(); ++i) {
+        if (!by_position[i].file_column_id.is_valid()) {
+            return Status::InvalidArgument("{} v2 request misses block position {}", _reader_name,
+                                           i);
+        }
+    }
+    *columns = std::move(by_position);
+    return Status::OK();
+}
+
+Status DelimitedTextReader::_open_file() {
+    _start_offset = _file_description->range_start_offset;
+    _size = _file_description->range_size;
+    // Some callers, especially stream-load/http_stream, do not know the total length up front.
+    // For a first split this is fine: NewPlainTextLineReader can read until the underlying reader
+    // returns EOF. For non-first splits we still need a concrete range so the pre-read/skip-one-line
+    // boundary logic does not read an unbounded stream.
+    if (_size <= 0 && _file_description->file_size >= 0) {
+        _size = _file_description->file_size - _start_offset;
+    }
+    if (_size < 0 && _start_offset > 0) {
+        return Status::InvalidArgument("{} v2 reader requires a valid split size for {}",
+                                       _reader_name, _file_description->path);
+    }
+    _skip_lines = 0;
+    if (_start_offset == 0) {
+        if (_scan_params->file_attributes.__isset.header_type &&
+            !_scan_params->file_attributes.header_type.empty()) {
+            const auto header_type = to_lower(_scan_params->file_attributes.header_type);
+            if (header_type == BeConsts::CSV_WITH_NAMES) {
+                _skip_lines = 1;
+            } else if (header_type == BeConsts::CSV_WITH_NAMES_AND_TYPES) {
+                _skip_lines = 2;
+            }
+        } else if (_scan_params->file_attributes.__isset.skip_lines) {
+            _skip_lines = _scan_params->file_attributes.skip_lines;
+        }
+    } else {
+        if (!_can_split()) {
+            return Status::InternalError<false>("For now we do not support split compressed file");
+        }
+        // Non-first splits normally start in the middle of a record. Pre-read at most one line
+        // delimiter byte range, then skip one line in `_read_next_line()`, so the first returned
+        // row is always complete. Example with '\n':
+        //   file bytes:  "1,a\n2,b\n"
+        //   split start:     ^
+        //   pre-read:     ^
+        //   skipped line: "a"
+        //   returned row: "2,b"
+        const int64_t pre_read_len =
+                std::min(static_cast<int64_t>(_line_delimiter.size()), _start_offset);
+        _start_offset -= pre_read_len;
+        _size += pre_read_len;
+        _skip_lines = 1;
+    }
+
+    if (_scan_params->file_type == TFileType::FILE_STREAM) {
+        if (!_stream_load_id.has_value()) {
+            return Status::InvalidArgument("{} v2 stream reader requires load id", _reader_name);
+        }
+        // Stream load/http_stream data lives in NewLoadStreamMgr rather than a filesystem. The
+        // generic FileFactory path only supports real file systems, so FILE_STREAM must use the
+        // same pipe-reader lookup as the old CSV reader.
+        RETURN_IF_ERROR(FileFactory::create_pipe_reader(*_stream_load_id, &_file_reader,
+                                                        _runtime_state, /*need_schema=*/false));
+    } else {
+        auto reader_options = FileFactory::get_reader_options(_runtime_state->query_options(),
+                                                              *_file_description);
+        auto file_reader = DORIS_TRY(FileFactory::create_file_reader(
+                *_system_properties, *_file_description, reader_options, _profile));
+        _file_reader = _io_ctx && _io_ctx->file_reader_stats
+                               ? std::make_shared<io::TracingFileReader>(std::move(file_reader),
+                                                                         _io_ctx->file_reader_stats)
+                               : file_reader;
+    }
+    if (_file_reader->size() == 0 && _scan_params->file_type != TFileType::FILE_STREAM &&
+        _scan_params->file_type != TFileType::FILE_BROKER) {
+        return Status::EndOfFile("init reader failed, empty {} file: {}", _reader_name,
+                                 _file_description->path);
+    }
+    return Status::OK();
+}
+
+Status DelimitedTextReader::_read_next_line(Slice* line, bool* eof) {
+    DORIS_CHECK(line != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    while (true) {
+        const uint8_t* ptr = nullptr;
+        size_t size = 0;
+        {
+            SCOPED_TIMER(_text_profile.read_line_time);
+            RETURN_IF_ERROR(_line_reader->read_line(&ptr, &size, &_line_reader_eof, _io_ctx.get()));
+        }
+        if (_line_reader_eof && size == 0) {
+            *eof = true;
+            return Status::OK();
+        }
+        if (_skip_lines == 0 && !_bom_removed) {
+            // BOM is stripped only from the first logical data line. Header lines are skipped
+            // before this branch, so a BOM inside a skipped header does not leak into user data.
+            ptr = _remove_bom(ptr, &size);
+            _bom_removed = true;
+        }
+        if (_skip_lines > 0) {
+            --_skip_lines;
+            _bom_removed = true;
+            update_counter(_text_profile.skipped_lines, 1);
+            continue;
+        }
+        *line = Slice(ptr, size);
+        *eof = false;
+        update_counter(_text_profile.raw_lines_read, 1);
+        return Status::OK();
+    }
+}
+
+Status DelimitedTextReader::_fill_columns_from_line(const Slice& line,
+                                                    std::vector<MutableColumnPtr>* columns,
+                                                    size_t* rows) {
+    DORIS_CHECK(columns != nullptr);
+    if (line.size == 0) {
+        update_counter(_text_profile.empty_lines_read, 1);
+        if (!_empty_line_as_record()) {
+            if (_runtime_state != nullptr && _runtime_state->is_read_csv_empty_line_as_null()) {
+                for (const auto& column : _requested_columns) {
+                    RETURN_IF_ERROR(_append_null((*columns)[column.block_position.value()].get()));
+                    update_counter(_text_profile.cells_deserialized, 1);
+                }
+                ++(*rows);
+            }
+            return Status::OK();
+        }
+    }
+    RETURN_IF_ERROR(_validate_line(line));
+
+    {
+        SCOPED_TIMER(_text_profile.split_line_time);
+        _split_line(line);
+    }
+    SCOPED_TIMER(_text_profile.deserialize_time);
+    for (const auto& column : _requested_columns) {
+        auto* output = (*columns)[column.block_position.value()].get();
+        const int32_t field_index = column.file_column_id.value();
+        // Missing trailing fields are query-compatible with the old readers: they become NULL
+        // rather than shifting subsequent projected columns or rejecting the row.
+        Slice value = field_index >= 0 && static_cast<size_t>(field_index) < _split_values.size()
+                              ? _split_values[field_index]
+                              : Slice(_options.null_format, _options.null_len);
+        RETURN_IF_ERROR(_deserialize_one_cell(column, output, _normalize_value(value)));
+        update_counter(_text_profile.cells_deserialized, 1);
+    }
+    ++(*rows);
+    return Status::OK();
+}
+
+Status DelimitedTextReader::_validate_line(const Slice& line) {
+    (void)line;
+    return Status::OK();
+}
+
+Slice DelimitedTextReader::_normalize_value(Slice value) const {
+    return value;
+}
+
+bool DelimitedTextReader::_empty_line_as_record() const {
+    return false;
+}
+
+bool DelimitedTextReader::_can_split() const {
+    return _file_compress_type == TFileCompressType::PLAIN;
+}
+
+Status DelimitedTextReader::_append_null(IColumn* output) {
+    DORIS_CHECK(output != nullptr);
+    auto* nullable = assert_cast<ColumnNullable*>(output);
+    nullable->insert_data(nullptr, 0);
+    return Status::OK();
+}
+
+const uint8_t* DelimitedTextReader::_remove_bom(const uint8_t* ptr, size_t* size) {
+    DORIS_CHECK(size != nullptr);
+    if (ptr != nullptr && *size >= 3 && static_cast<uint8_t>(ptr[0]) == 0xEF &&
+        static_cast<uint8_t>(ptr[1]) == 0xBB && static_cast<uint8_t>(ptr[2]) == 0xBF) {
+        *size -= 3;
+        return ptr + 3;
+    }
+    return ptr;
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/delimited_text/delimited_text_reader.h b/be/src/format_v2/delimited_text/delimited_text_reader.h
new file mode 100644
index 00000000000000..06cb93dd7f7b65
--- /dev/null
+++ b/be/src/format_v2/delimited_text/delimited_text_reader.h
@@ -0,0 +1,176 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "core/data_type_serde/data_type_serde.h"
+#include "format_v2/file_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_profile.h"
+#include "util/slice.h"
+
+namespace doris {
+class Decompressor;
+class LineReader;
+class SlotDescriptor;
+} // namespace doris
+
+namespace doris::format {
+
+// Shared FileReader implementation for delimited text-like formats in FileScannerV2.
+//
+// CSV and Hive text have different row parsing and cell serde rules, but their v2 FileReader
+// control flow is the same: expose a file-local schema from FE slot descriptors, resolve
+// FileScanRequest local positions, read physical lines, materialize requested columns, apply
+// file-local conjuncts, and optionally count rows by scanning. This base keeps that contract in one
+// place while derived readers provide only format-specific hooks.
+class DelimitedTextReader : public FileReader {
+public:
+    ~DelimitedTextReader() override;
+
+    Status init(RuntimeState* state) override;
+    Status get_schema(std::vector<ColumnDefinition>* file_schema) const override;
+    std::unique_ptr<TableColumnMapper> create_column_mapper(
+            TableColumnMapperOptions options) const override;
+    Status open(std::shared_ptr<FileScanRequest> request) override;
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override;
+    Status get_aggregate_result(const FileAggregateRequest& request,
+                                FileAggregateResult* result) override;
+    Status close() override;
+
+protected:
+    struct DelimitedTextProfile {
+        RuntimeProfile::Counter* open_file_time = nullptr;
+        RuntimeProfile::Counter* create_line_reader_time = nullptr;
+        RuntimeProfile::Counter* read_line_time = nullptr;
+        RuntimeProfile::Counter* split_line_time = nullptr;
+        RuntimeProfile::Counter* deserialize_time = nullptr;
+        RuntimeProfile::Counter* conjunct_filter_time = nullptr;
+        RuntimeProfile::Counter* delete_conjunct_filter_time = nullptr;
+        RuntimeProfile::Counter* raw_lines_read = nullptr;
+        RuntimeProfile::Counter* rows_read_before_filter = nullptr;
+        RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr;
+        RuntimeProfile::Counter* rows_filtered_by_delete_conjunct = nullptr;
+        RuntimeProfile::Counter* rows_returned = nullptr;
+        RuntimeProfile::Counter* empty_lines_read = nullptr;
+        RuntimeProfile::Counter* skipped_lines = nullptr;
+        RuntimeProfile::Counter* cells_deserialized = nullptr;
+    };
+
+    struct RequestedColumn {
+        LocalColumnId file_column_id = LocalColumnId::invalid();
+        LocalIndex block_position;
+        const SlotDescriptor* slot_desc = nullptr;
+        DataTypeSerDeSPtr serde;
+        bool nullable_string_fast_path = false;
+    };
+
+    DelimitedTextReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                        std::unique_ptr<io::FileDescription>& file_description,
+                        std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                        const TFileScanRangeParams* scan_params,
+                        const std::vector<SlotDescriptor*>& file_slot_descs,
+                        TFileCompressType::type range_compress_type,
+                        std::optional<TUniqueId> stream_load_id, std::string reader_name);
+
+    // Initialize format-specific options after the common init path has validated scan params and
+    // runtime state. Implementations must fill `_value_separator`, `_line_delimiter`,
+    // `_file_compress_type`, `_options`, and any parser-specific state before the common schema
+    // construction reads column_idxs.
+    virtual Status _init_format_state() = 0;
+    // Create the decompressor used by the line reader. CSV may infer compression from the file
+    // format enum, while Hive text uses only the explicit compress_type.
+    virtual Status _create_decompressor() = 0;
+    // Create the physical line reader. Implementations choose plain/enclosed/binary line contexts,
+    // but must store the result in `_line_reader` for the common get_block/count paths.
+    virtual Status _create_line_reader() = 0;
+    // Validate one logical line before splitting. CSV enforces UTF-8 for query reads; Hive text
+    // deliberately accepts arbitrary bytes and uses the default OK implementation.
+    virtual Status _validate_line(const Slice& line);
+    // Split one logical line into `_split_values`. The common materialization path then resolves
+    // requested field ordinals against `_split_values`.
+    virtual void _split_line(const Slice& line) = 0;
+    // Deserialize a single normalized field into the requested output column using the
+    // format-specific serde API.
+    virtual Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                         Slice value) = 0;
+    // Let formats rewrite a raw field before serde. CSV uses this for empty_field_as_null; Hive
+    // text keeps the raw field because empty string and NULL are distinct unless null_format
+    // matches exactly.
+    virtual Slice _normalize_value(Slice value) const;
+    // Whether an empty physical line is one logical record. CSV keeps the existing default
+    // skip behavior, while Hive TEXTFILE treats an empty line as a record with one empty field.
+    virtual bool _empty_line_as_record() const;
+    // Whether this file can start at a non-zero split offset. Compressed delimited files cannot be
+    // split because the decompressor needs the stream from the beginning.
+    virtual bool _can_split() const;
+
+    Status _append_null(IColumn* output);
+    // Match the generic nullable serde semantics exactly: a field is NULL when its raw slice is
+    // byte-for-byte equal to null_format. This also covers Hive tables that set
+    // serialization.null.format to the empty string.
+    bool _is_null_format(Slice value) const;
+    const uint8_t* _remove_bom(const uint8_t* ptr, size_t* size);
+    void _init_profile() override;
+
+    const TFileScanRangeParams* _scan_params = nullptr;
+    std::vector<SlotDescriptor*> _source_file_slot_descs;
+    std::vector<int32_t> _source_column_idxs;
+    DataTypeSerDeSPtrs _source_serdes;
+    std::vector<ColumnDefinition> _file_schema;
+    RuntimeState* _runtime_state = nullptr;
+
+    std::vector<RequestedColumn> _requested_columns;
+    std::unique_ptr<Decompressor> _decompressor;
+    std::unique_ptr<LineReader> _line_reader;
+    std::vector<Slice> _split_values;
+    DataTypeSerDe::FormatOptions _options;
+
+    std::string _value_separator;
+    std::string _line_delimiter;
+    TFileCompressType::type _file_compress_type = TFileCompressType::UNKNOWN;
+    TFileCompressType::type _range_compress_type = TFileCompressType::UNKNOWN;
+    std::optional<TUniqueId> _stream_load_id;
+    int64_t _start_offset = 0;
+    int64_t _size = -1;
+    int _skip_lines = 0;
+    char _escape = 0;
+    bool _line_reader_eof = false;
+    bool _bom_removed = false;
+    // FE exposes this as an optional text-file attribute. Keep the default strict so missing thrift
+    // fields do not accidentally accept arbitrary bytes; CSV can still opt out through the session
+    // variable or TVF/file-format property `enable_text_validate_utf8=false`.
+    bool _enable_text_validate_utf8 = true;
+    DelimitedTextProfile _text_profile;
+
+private:
+    Status _build_requested_columns(const FileScanRequest& request,
+                                    std::vector<RequestedColumn>* columns) const;
+    Status _open_file();
+    Status _read_next_line(Slice* line, bool* eof);
+    Status _fill_columns_from_line(const Slice& line, std::vector<MutableColumnPtr>* columns,
+                                   size_t* rows);
+
+    std::string _reader_name;
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/delimited_text/text_reader.cpp b/be/src/format_v2/delimited_text/text_reader.cpp
new file mode 100644
index 00000000000000..930052a14f1229
--- /dev/null
+++ b/be/src/format_v2/delimited_text/text_reader.cpp
@@ -0,0 +1,164 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/text_reader.h"
+
+#include <cstring>
+#include <utility>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type_serde/data_type_string_serde.h"
+#include "format/file_reader/new_plain_text_line_reader.h"
+#include "runtime/descriptors.h"
+#include "util/decompressor.h"
+
+namespace doris::format::text {
+namespace {
+
+bool starts_with_at(const Slice& line, size_t pos, const std::string& needle) {
+    return !needle.empty() && pos + needle.size() <= line.size &&
+           std::memcmp(line.data + pos, needle.data(), needle.size()) == 0;
+}
+
+} // namespace
+
+TextReader::TextReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                       std::unique_ptr<io::FileDescription>& file_description,
+                       std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                       const TFileScanRangeParams* scan_params,
+                       const std::vector<SlotDescriptor*>& file_slot_descs,
+                       TFileCompressType::type range_compress_type,
+                       std::optional<TUniqueId> stream_load_id)
+        : DelimitedTextReader(system_properties, file_description, std::move(io_ctx), profile,
+                              scan_params, file_slot_descs, range_compress_type,
+                              std::move(stream_load_id), "Text") {}
+
+TextReader::~TextReader() = default;
+
+Status TextReader::_init_format_state() {
+    _file_compress_type =
+            _range_compress_type != TFileCompressType::UNKNOWN
+                    ? _range_compress_type
+                    : (_scan_params->__isset.compress_type ? _scan_params->compress_type
+                                                           : TFileCompressType::PLAIN);
+
+    const auto& text_params = _scan_params->file_attributes.text_params;
+    _value_separator = text_params.column_separator;
+    _line_delimiter = text_params.line_delimiter;
+    if (text_params.__isset.escape) {
+        _escape = text_params.escape;
+    }
+    _options.escape_char = _escape;
+    _options.collection_delim =
+            text_params.collection_delimiter.empty() ? ',' : text_params.collection_delimiter[0];
+    _options.map_key_delim =
+            text_params.mapkv_delimiter.empty() ? ':' : text_params.mapkv_delimiter[0];
+    if (text_params.__isset.null_format) {
+        _options.null_format = text_params.null_format.data();
+        _options.null_len = text_params.null_format.length();
+    }
+    return Status::OK();
+}
+
+Status TextReader::_create_decompressor() {
+    return Decompressor::create_decompressor(_file_compress_type, &_decompressor);
+}
+
+Status TextReader::_create_line_reader() {
+    auto text_line_reader_ctx = std::make_shared<PlainTextLineReaderCtx>(
+            _line_delimiter, _line_delimiter.size(), false);
+    _line_reader = NewPlainTextLineReader::create_unique(
+            _profile, _file_reader, _decompressor.get(), std::move(text_line_reader_ctx), _size,
+            _start_offset);
+    return Status::OK();
+}
+
+void TextReader::_split_line(const Slice& line) {
+    _split_values.clear();
+    if (_value_separator.size() == 1) {
+        _split_line_single_char(line);
+    } else {
+        _split_line_multi_char(line);
+    }
+}
+
+void TextReader::_split_line_single_char(const Slice& line) {
+    size_t value_start = 0;
+    for (size_t i = 0; i < line.size; ++i) {
+        if (line.data[i] == _value_separator[0]) {
+            // Hive text lets a string escape the field separator. The backslash remains in the
+            // field slice so deserialize_one_cell_from_hive_text() can unescape the final value.
+            if (_escape != 0 && i > 0 && line.data[i - 1] == _escape) {
+                continue;
+            }
+            _split_values.emplace_back(line.data + value_start, i - value_start);
+            value_start = i + _value_separator.size();
+        }
+    }
+    _split_values.emplace_back(line.data + value_start, line.size - value_start);
+}
+
+void TextReader::_split_line_multi_char(const Slice& line) {
+    size_t value_start = 0;
+    size_t i = 0;
+    while (i < line.size) {
+        if (starts_with_at(line, i, _value_separator)) {
+            if (_escape != 0 && i > 0 && line.data[i - 1] == _escape) {
+                ++i;
+                continue;
+            }
+            _split_values.emplace_back(line.data + value_start, i - value_start);
+            i += _value_separator.size();
+            value_start = i;
+            continue;
+        }
+        ++i;
+    }
+    _split_values.emplace_back(line.data + value_start, line.size - value_start);
+}
+
+Status TextReader::_deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                         Slice value) {
+    DORIS_CHECK(output != nullptr);
+    if (column.nullable_string_fast_path) {
+        auto& null_column = assert_cast<ColumnNullable&>(*output);
+        if (_is_null_format(value)) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        static DataTypeStringSerDe string_serde(TYPE_STRING);
+        auto status = string_serde.deserialize_one_cell_from_hive_text(
+                null_column.get_nested_column(), value, _options);
+        if (!status.ok()) {
+            null_column.insert_data(nullptr, 0);
+            return Status::OK();
+        }
+        null_column.get_null_map_data().push_back(0);
+        return Status::OK();
+    }
+    return column.serde->deserialize_one_cell_from_hive_text(*output, value, _options);
+}
+
+bool TextReader::_empty_line_as_record() const {
+    // Hive TEXTFILE treats an empty physical line as a record. The splitter maps it
+    // to one empty field and missing trailing fields are filled with null_format.
+    return true;
+}
+
+} // namespace doris::format::text
diff --git a/be/src/format_v2/delimited_text/text_reader.h b/be/src/format_v2/delimited_text/text_reader.h
new file mode 100644
index 00000000000000..8efbfe359c7e64
--- /dev/null
+++ b/be/src/format_v2/delimited_text/text_reader.h
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+
+#include "format_v2/delimited_text/delimited_text_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "util/slice.h"
+
+namespace doris {
+class SlotDescriptor;
+} // namespace doris
+
+namespace doris::format::text {
+
+// FileScannerV2 Hive text reader.
+//
+// Text files do not have embedded schema, so FE-provided file slots and column_idxs are converted
+// into a file-local schema in the same way as CSV v2. The row parser is intentionally different
+// from CSV: field splitting follows Hive text escaping rules and cells are deserialized through
+// deserialize_one_cell_from_hive_text().
+class TextReader final : public ::doris::format::DelimitedTextReader {
+public:
+    TextReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+               std::unique_ptr<io::FileDescription>& file_description,
+               std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+               const TFileScanRangeParams* scan_params,
+               const std::vector<SlotDescriptor*>& file_slot_descs,
+               TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN,
+               std::optional<TUniqueId> stream_load_id = std::nullopt);
+    ~TextReader() override;
+
+private:
+    Status _init_format_state() override;
+    Status _create_decompressor() override;
+    Status _create_line_reader() override;
+    void _split_line(const Slice& line) override;
+    void _split_line_single_char(const Slice& line);
+    void _split_line_multi_char(const Slice& line);
+    Status _deserialize_one_cell(const RequestedColumn& column, IColumn* output,
+                                 Slice value) override;
+    bool _empty_line_as_record() const override;
+};
+
+} // namespace doris::format::text
diff --git a/be/src/format_v2/expr/cast.cpp b/be/src/format_v2/expr/cast.cpp
new file mode 100644
index 00000000000000..efeb9d851deb22
--- /dev/null
+++ b/be/src/format_v2/expr/cast.cpp
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/cast.h"
+
+#include <fmt/format.h>
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+
+#include <ostream>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/block/columns_with_type_and_name.h"
+#include "exprs/function/simple_function_factory.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vliteral.h"
+
+namespace doris::format {
+
+Status Cast::prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    if (_children.size() != 1) {
+        return Status::InternalError(
+                fmt::format("Cast should have exactly 1 child expr, but got {}", _children.size()));
+    }
+    ColumnsWithTypeAndName argument_template;
+    argument_template.reserve(_children.size());
+    if (_children[0]->is_literal()) {
+        // For some functions, he needs some literal columns to derive the return type.
+        auto literal_node = std::dynamic_pointer_cast<VLiteral>(_children[0]);
+        argument_template.emplace_back(literal_node->get_column_ptr(), _children[0]->data_type(),
+                                       _children[0]->expr_name());
+    } else {
+        argument_template.emplace_back(nullptr, _children[0]->data_type(),
+                                       _children[0]->expr_name());
+    }
+
+    _expr_name = fmt::format("CAST(arguments={},return={})", _children[0]->data_type()->get_name(),
+                             _data_type->get_name());
+    // get the function. won't prepare function.
+    _function = SimpleFunctionFactory::instance().get_function(
+            "CAST", argument_template, _data_type,
+            {.new_version_unix_timestamp = state->query_options().new_version_unix_timestamp},
+            state->be_exec_version());
+    if (_function == nullptr) {
+        return Status::InternalError("Could not find function {} ", _expr_name);
+    }
+    VExpr::register_function_context(state, context);
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status Cast::open(RuntimeState* state, VExprContext* context,
+                  FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    for (auto& i : _children) {
+        RETURN_IF_ERROR(i->open(state, context, scope));
+    }
+    RETURN_IF_ERROR(VExpr::init_function_context(state, context, scope, _function));
+    if (scope == FunctionContext::FRAGMENT_LOCAL) {
+        RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
+    }
+    _open_finished = true;
+    return Status::OK();
+}
+
+void Cast::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
+    VExpr::close_function_context(context, scope, _function);
+    VExpr::close(context, scope);
+}
+
+Status Cast::execute_column_impl(VExprContext* context, const Block* block,
+                                 const Selector* selector, size_t count,
+                                 ColumnPtr& result_column) const {
+    return _do_execute(context, block, selector, count, result_column);
+}
+
+std::string Cast::debug_string() const {
+    return _expr_name;
+}
+
+Status Cast::_do_execute(VExprContext* context, const Block* block, const Selector* selector,
+                         size_t count, ColumnPtr& result_column) const {
+    DCHECK(_open_finished || block == nullptr) << debug_string();
+    if (_children.size() != 1) {
+        return Status::InternalError(
+                fmt::format("Cast should have exactly 1 child expr, but got {}", _children.size()));
+    }
+    if (is_const_and_have_executed()) { // const have executed in open function
+        result_column = get_result_from_const(count);
+        return Status::OK();
+    }
+
+    Block temp_block;
+    ColumnNumbers args(1);
+
+    ColumnPtr tmp_arg_column;
+    RETURN_IF_ERROR(_children[0]->execute_column(context, block, selector, count, tmp_arg_column));
+    auto arg_type = _children[0]->execute_type(block);
+    temp_block.insert({tmp_arg_column, arg_type, _children[0]->expr_name()});
+    args[0] = 0;
+
+    uint32_t num_columns_without_result = temp_block.columns();
+    // prepare a column to save result
+    temp_block.insert({nullptr, _data_type, _expr_name});
+
+    RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), temp_block, args,
+                                       num_columns_without_result, count));
+    result_column = temp_block.get_by_position(num_columns_without_result).column;
+    DCHECK_EQ(result_column->size(), count);
+    RETURN_IF_ERROR(result_column->column_self_check());
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/cast.h b/be/src/format_v2/expr/cast.h
new file mode 100644
index 00000000000000..1dc06bcf07f2bc
--- /dev/null
+++ b/be/src/format_v2/expr/cast.h
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "common/object_pool.h"
+#include "common/status.h"
+#include "exprs/function_context.h"
+#include "exprs/vexpr.h"
+
+namespace doris {
+class RowDescriptor;
+class RuntimeState;
+class TExprNode;
+class Block;
+class VExprContext;
+} // namespace doris
+
+namespace doris::format {
+
+class Cast final : public VExpr {
+    ENABLE_FACTORY_CREATOR(Cast);
+
+public:
+    Cast(const DataTypePtr& type) {
+        _node_type = TExprNodeType::CAST_EXPR;
+        _opcode = TExprOpcode::CAST;
+        _data_type = type;
+    }
+    ~Cast() override = default;
+    Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
+    Status open(RuntimeState* state, VExprContext* context,
+                FunctionContext::FunctionStateScope scope) override;
+    void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override;
+    std::string debug_string() const override;
+    uint64_t get_digest(uint64_t seed) const override { return 0; }
+    const std::string& expr_name() const override { return _expr_name; }
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = Cast::create_shared(_data_type);
+        return Status::OK();
+    }
+
+private:
+    Status _do_execute(VExprContext* context, const Block* block, const Selector* selector,
+                       size_t count, ColumnPtr& result_column) const;
+    std::string _expr_name;
+    FunctionBasePtr _function;
+};
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/delete_predicate.cpp b/be/src/format_v2/expr/delete_predicate.cpp
new file mode 100644
index 00000000000000..9ab1090247c15a
--- /dev/null
+++ b/be/src/format_v2/expr/delete_predicate.cpp
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/delete_predicate.h"
+
+#include <fmt/format.h>
+#include <gen_cpp/Exprs_types.h>
+#include <glog/logging.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <ostream>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/block/column_numbers.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/block/columns_with_type_and_name.h"
+
+namespace doris::format {
+
+DeletePredicate::DeletePredicate(const std::vector<int64_t>& deleted_rows)
+        : VExpr(), _deleted_rows(deleted_rows) {
+    _node_type = TExprNodeType::PREDICATE;
+    _opcode = TExprOpcode::DELETE;
+    _data_type = std::make_shared<DataTypeBool>();
+}
+
+Status DeletePredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
+                                VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    _expr_name = "DeletePredicate";
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status DeletePredicate::open(RuntimeState* state, VExprContext* context,
+                             FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::open(state, context, scope));
+    _open_finished = true;
+    return Status::OK();
+}
+
+void DeletePredicate::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
+    VExpr::close(context, scope);
+}
+
+/**
+ * DeletePredicate is derived from 2 cases:
+ * 1. All row IDs indicates deleted rows. (e.g. Delete rows with row_id in (1, 2, 3))
+ * 2. Bit vector indicates whether each row is deleted or not. (e.g. Bit vector[0,1,0,0,1] indicates row 1 and row 4 are deleted)
+ *
+ * So DeletePredicate should have exactly 1 child expr, which is the slot of row id.
+ * Row IDs should be generated by file reader as a virtual column in `block`.
+ **/
+Status DeletePredicate::execute(VExprContext* context, Block* block, int* result_column_id) const {
+    if (_children.size() != 1) {
+        return Status::InternalError(fmt::format(
+                "DeletePredicate should have exactly 1 child expr, but got {}", _children.size()));
+    }
+    int slot = -1;
+    RETURN_IF_ERROR(_children[0]->execute(context, block, &slot));
+    if (slot < 0 || static_cast<size_t>(slot) >= block->columns()) {
+        return Status::InternalError(
+                "DeletePredicate row id child returned invalid column id {}, block has {} columns",
+                slot, block->columns());
+    }
+    const auto& row_ids =
+            assert_cast<const ColumnInt64&>(*block->get_by_position(slot).column).get_data();
+    const auto count = row_ids.size();
+    auto res_col = ColumnBool::create(count, 0);
+    if (_deleted_rows.empty()) {
+        block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+        *result_column_id = static_cast<int>(block->get_columns().size() - 1);
+        return Status::OK();
+    }
+    if (count == 0) {
+        block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+        *result_column_id = static_cast<int>(block->get_columns().size() - 1);
+        return Status::OK();
+    }
+    const int64_t* delete_rows = _deleted_rows.data();
+    const int64_t* delete_rows_end = delete_rows + _deleted_rows.size();
+    const int64_t* start_pos = std::lower_bound(delete_rows, delete_rows_end, row_ids[0]);
+    int64_t start_index = start_pos - delete_rows;
+    const int64_t* end_pos = std::upper_bound(start_pos, delete_rows_end, row_ids[count - 1]);
+    const int64_t end_index = end_pos - delete_rows;
+
+    while (start_index < end_index) {
+        int64_t delete_row = delete_rows[start_index];
+        if (const auto it = std::ranges::lower_bound(row_ids, delete_row);
+            it != row_ids.end() && *it == delete_row) {
+            const size_t index = it - row_ids.begin();
+            res_col->get_data()[index] = true;
+        }
+        ++start_index;
+    }
+    block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+    *result_column_id = static_cast<int>(block->get_columns().size() - 1);
+    return Status::OK();
+}
+
+std::string DeletePredicate::debug_string() const {
+    return _expr_name;
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/delete_predicate.h b/be/src/format_v2/expr/delete_predicate.h
new file mode 100644
index 00000000000000..dce2de3edf278e
--- /dev/null
+++ b/be/src/format_v2/expr/delete_predicate.h
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <string>
+
+#include "common/object_pool.h"
+#include "common/status.h"
+#include "exprs/function_context.h"
+#include "exprs/vexpr.h"
+
+namespace doris {
+class RowDescriptor;
+class RuntimeState;
+class TExprNode;
+class Block;
+class VExprContext;
+} // namespace doris
+
+namespace doris::format {
+
+class DeletePredicate final : public VExpr {
+    ENABLE_FACTORY_CREATOR(DeletePredicate);
+
+public:
+    DeletePredicate(const std::vector<int64_t>& deleted_rows);
+    ~DeletePredicate() override = default;
+    Status execute(VExprContext* context, Block* block, int* result_column_id) const override;
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        return Status::InternalError("Not implement DeletePredicate::execute_column_impl");
+    }
+    Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
+    Status open(RuntimeState* state, VExprContext* context,
+                FunctionContext::FunctionStateScope scope) override;
+    void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
+    std::string debug_string() const override;
+    uint64_t get_digest(uint64_t seed) const override { return 0; }
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    std::string _expr_name;
+    const std::vector<int64_t>& _deleted_rows;
+};
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/equality_delete_predicate.cpp b/be/src/format_v2/expr/equality_delete_predicate.cpp
new file mode 100644
index 00000000000000..13454e3b22f116
--- /dev/null
+++ b/be/src/format_v2/expr/equality_delete_predicate.cpp
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/equality_delete_predicate.h"
+
+#include <gen_cpp/Exprs_types.h>
+
+#include <utility>
+
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/column_with_type_and_name.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+
+namespace doris::format {
+namespace {
+
+bool column_value_equal(const ColumnPtr& lhs, size_t lhs_row, const ColumnPtr& rhs,
+                        size_t rhs_row) {
+    if (lhs->is_nullable() && rhs->is_nullable()) {
+        return lhs->compare_at(lhs_row, rhs_row, *rhs, -1) == 0;
+    }
+    if (lhs->is_nullable()) {
+        const auto& nullable_lhs = assert_cast<const ColumnNullable&>(*lhs);
+        return !nullable_lhs.is_null_at(lhs_row) &&
+               nullable_lhs.get_nested_column().compare_at(lhs_row, rhs_row, *rhs, -1) == 0;
+    }
+    if (rhs->is_nullable()) {
+        const auto& nullable_rhs = assert_cast<const ColumnNullable&>(*rhs);
+        return !nullable_rhs.is_null_at(rhs_row) &&
+               lhs->compare_at(lhs_row, rhs_row, nullable_rhs.get_nested_column(), -1) == 0;
+    }
+    return lhs->compare_at(lhs_row, rhs_row, *rhs, -1) == 0;
+}
+
+} // namespace
+
+EqualityDeletePredicate::EqualityDeletePredicate(Block delete_block, std::vector<int> field_ids)
+        : VExpr(), _delete_block(std::move(delete_block)), _field_ids(std::move(field_ids)) {
+    _node_type = TExprNodeType::PREDICATE;
+    _opcode = TExprOpcode::DELETE;
+    _data_type = std::make_shared<DataTypeBool>();
+    _expr_name = "EqualityDeletePredicate";
+    DCHECK_EQ(_delete_block.columns(), _field_ids.size());
+    _delete_hashes = _build_hashes(_delete_block);
+    for (size_t row = 0; row < _delete_hashes.size(); ++row) {
+        _delete_hash_map.emplace(_delete_hashes[row], row);
+    }
+}
+
+Status EqualityDeletePredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
+                                        VExprContext* context) {
+    RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context));
+    _expr_name = "EqualityDeletePredicate";
+    _prepare_finished = true;
+    return Status::OK();
+}
+
+Status EqualityDeletePredicate::open(RuntimeState* state, VExprContext* context,
+                                     FunctionContext::FunctionStateScope scope) {
+    DCHECK(_prepare_finished);
+    for (auto& child : _children) {
+        RETURN_IF_ERROR(child->open(state, context, scope));
+    }
+    if (scope == FunctionContext::FRAGMENT_LOCAL) {
+        RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
+    }
+    _open_finished = true;
+    return Status::OK();
+}
+
+void EqualityDeletePredicate::close(VExprContext* context,
+                                    FunctionContext::FunctionStateScope scope) {
+    VExpr::close(context, scope);
+}
+
+Status EqualityDeletePredicate::execute(VExprContext* context, Block* block,
+                                        int* result_column_id) const {
+    if (_children.size() != _field_ids.size()) {
+        return Status::InternalError(
+                "EqualityDeletePredicate should have {} child exprs, but got {}", _field_ids.size(),
+                _children.size());
+    }
+
+    Block data_key_block;
+    for (const auto& child : _children) {
+        Block eval_block = *block;
+        int slot = -1;
+        RETURN_IF_ERROR(child->execute(context, &eval_block, &slot));
+        const auto& key_column = eval_block.get_by_position(slot);
+        data_key_block.insert({key_column.column, key_column.type, key_column.name});
+    }
+
+    const auto rows = data_key_block.rows();
+    auto res_col = ColumnBool::create(rows, 0);
+    if (_delete_hash_map.empty() || rows == 0) {
+        block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+        *result_column_id = static_cast<int>(block->columns() - 1);
+        return Status::OK();
+    }
+
+    auto data_hashes = _build_hashes(data_key_block);
+    auto& result_data = res_col->get_data();
+    for (size_t row = 0; row < rows; ++row) {
+        const auto range = _delete_hash_map.equal_range(data_hashes[row]);
+        for (auto it = range.first; it != range.second; ++it) {
+            if (_equal(data_key_block, row, it->second)) {
+                result_data[row] = true;
+                break;
+            }
+        }
+    }
+
+    block->insert({std::move(res_col), std::make_shared<DataTypeBool>(), expr_name()});
+    *result_column_id = static_cast<int>(block->columns() - 1);
+    return Status::OK();
+}
+
+std::vector<uint64_t> EqualityDeletePredicate::_build_hashes(const Block& block) {
+    std::vector<uint64_t> hashes(block.rows(), 0);
+    for (const auto& column : block.get_columns()) {
+        column->update_hashes_with_value(hashes.data(), nullptr);
+    }
+    return hashes;
+}
+
+bool EqualityDeletePredicate::_equal(const Block& data_block, size_t data_row,
+                                     size_t delete_row) const {
+    for (size_t column_idx = 0; column_idx < _delete_block.columns(); ++column_idx) {
+        const auto& data_column = data_block.get_by_position(column_idx).column;
+        const auto& delete_column = _delete_block.get_by_position(column_idx).column;
+        if (!column_value_equal(data_column, data_row, delete_column, delete_row)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+std::string EqualityDeletePredicate::debug_string() const {
+    return _expr_name;
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/expr/equality_delete_predicate.h b/be/src/format_v2/expr/equality_delete_predicate.h
new file mode 100644
index 00000000000000..cad16ca387ccd8
--- /dev/null
+++ b/be/src/format_v2/expr/equality_delete_predicate.h
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "exprs/function_context.h"
+#include "exprs/vexpr.h"
+
+namespace doris {
+class RowDescriptor;
+class RuntimeState;
+class VExprContext;
+} // namespace doris
+
+namespace doris::format {
+
+class EqualityDeletePredicate final : public VExpr {
+    ENABLE_FACTORY_CREATOR(EqualityDeletePredicate);
+
+public:
+    EqualityDeletePredicate(Block delete_block, std::vector<int> field_ids);
+    ~EqualityDeletePredicate() override = default;
+
+    Status execute(VExprContext* context, Block* block, int* result_column_id) const override;
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        return Status::InternalError("Not implement EqualityDeletePredicate::execute_column_impl");
+    }
+    Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
+    Status open(RuntimeState* state, VExprContext* context,
+                FunctionContext::FunctionStateScope scope) override;
+    void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
+    std::string debug_string() const override;
+    uint64_t get_digest(uint64_t seed) const override { return 0; }
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    static std::vector<uint64_t> _build_hashes(const Block& block);
+    bool _equal(const Block& data_block, size_t data_row, size_t delete_row) const;
+
+    std::string _expr_name;
+    Block _delete_block;
+    std::vector<int> _field_ids;
+    std::vector<uint64_t> _delete_hashes;
+    std::multimap<uint64_t, size_t> _delete_hash_map;
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/file_reader.cpp b/be/src/format_v2/file_reader.cpp
new file mode 100644
index 00000000000000..31b3f27c69797d
--- /dev/null
+++ b/be/src/format_v2/file_reader.cpp
@@ -0,0 +1,209 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/file_reader.h"
+
+#include <sstream>
+
+#include "format_v2/column_mapper.h"
+#include "io/fs/buffered_reader.h"
+#include "io/fs/tracing_file_reader.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format {
+namespace {
+
+std::unique_ptr<FileStructPredicateTarget> clone_struct_predicate_target(
+        const std::unique_ptr<FileStructPredicateTarget>& target) {
+    return target == nullptr ? nullptr : std::make_unique<FileStructPredicateTarget>(*target);
+}
+
+template <typename T, typename Formatter>
+std::string join_debug_strings(const std::vector<T>& values, Formatter formatter) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t i = 0; i < values.size(); ++i) {
+        if (i > 0) {
+            out << ", ";
+        }
+        out << formatter(values[i]);
+    }
+    out << "]";
+    return out.str();
+}
+
+std::string int_vector_debug_string(const std::vector<int32_t>& values) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t i = 0; i < values.size(); ++i) {
+        if (i > 0) {
+            out << ", ";
+        }
+        out << values[i];
+    }
+    out << "]";
+    return out.str();
+}
+
+void append_struct_predicate_path(const FileStructPredicateTarget* target,
+                                  std::vector<int32_t>* path) {
+    DORIS_CHECK(path != nullptr);
+    for (const auto* current = target; current != nullptr; current = current->child.get()) {
+        path->push_back(current->file_local_id);
+    }
+}
+
+std::string struct_predicate_target_debug_string(const FileStructPredicateTarget* target) {
+    if (target == nullptr) {
+        return "null";
+    }
+    std::ostringstream out;
+    out << "{file_local_id=" << target->file_local_id
+        << ", file_child_name=" << target->file_child_name
+        << ", child=" << struct_predicate_target_debug_string(target->child.get()) << "}";
+    return out.str();
+}
+
+bool struct_predicate_targets_equal(const FileStructPredicateTarget* lhs,
+                                    const FileStructPredicateTarget* rhs) {
+    while (lhs != nullptr && rhs != nullptr) {
+        if (lhs->file_local_id != rhs->file_local_id) {
+            return false;
+        }
+        lhs = lhs->child.get();
+        rhs = rhs->child.get();
+    }
+    return lhs == nullptr && rhs == nullptr;
+}
+
+} // namespace
+
+FileStructPredicateTarget::FileStructPredicateTarget(const FileStructPredicateTarget& other)
+        : file_local_id(other.file_local_id),
+          file_child_name(other.file_child_name),
+          child(clone_struct_predicate_target(other.child)) {}
+
+FileStructPredicateTarget& FileStructPredicateTarget::operator=(
+        const FileStructPredicateTarget& other) {
+    if (this == &other) {
+        return *this;
+    }
+    file_local_id = other.file_local_id;
+    file_child_name = other.file_child_name;
+    child = clone_struct_predicate_target(other.child);
+    return *this;
+}
+
+FileNestedPredicateTarget::FileNestedPredicateTarget(const FileNestedPredicateTarget& other)
+        : file_column_id(other.file_column_id),
+          struct_target(clone_struct_predicate_target(other.struct_target)) {}
+
+FileNestedPredicateTarget& FileNestedPredicateTarget::operator=(
+        const FileNestedPredicateTarget& other) {
+    if (this == &other) {
+        return *this;
+    }
+    file_column_id = other.file_column_id;
+    struct_target = clone_struct_predicate_target(other.struct_target);
+    return *this;
+}
+
+LocalColumnId FileColumnPredicateFilter::effective_file_column_id() const {
+    return target.is_valid() ? target.file_column_id : file_column_id;
+}
+
+std::vector<int32_t> FileColumnPredicateFilter::effective_file_child_id_path() const {
+    if (!target.is_valid()) {
+        return file_child_id_path;
+    }
+    std::vector<int32_t> path;
+    append_struct_predicate_path(target.struct_target.get(), &path);
+    return path;
+}
+
+bool FileColumnPredicateFilter::same_target_as(const FileColumnPredicateFilter& other) const {
+    if (target.is_valid() && other.target.is_valid()) {
+        return target.file_column_id == other.target.file_column_id &&
+               struct_predicate_targets_equal(target.struct_target.get(),
+                                              other.target.struct_target.get());
+    }
+    return effective_file_column_id() == other.effective_file_column_id() &&
+           effective_file_child_id_path() == other.effective_file_child_id_path();
+}
+
+std::string FileColumnPredicateFilter::debug_string() const {
+    std::ostringstream out;
+    out << "FileColumnPredicateFilter{target={file_column_id=" << effective_file_column_id()
+        << ", struct_target=" << struct_predicate_target_debug_string(target.struct_target.get())
+        << "}, file_child_id_path=" << int_vector_debug_string(effective_file_child_id_path())
+        << ", predicate_count=" << predicates.size() << "}";
+    return out.str();
+}
+
+std::string FileScanRequest::debug_string() const {
+    std::ostringstream out;
+    out << "FileScanRequest{predicate_columns="
+        << join_debug_strings(
+                   predicate_columns,
+                   [](const LocalColumnIndex& projection) { return projection.debug_string(); })
+        << ", non_predicate_columns="
+        << join_debug_strings(
+                   non_predicate_columns,
+                   [](const LocalColumnIndex& projection) { return projection.debug_string(); })
+        << ", local_positions={";
+    size_t position_idx = 0;
+    for (const auto& [column_id, block_position] : local_positions) {
+        if (position_idx++ > 0) {
+            out << ", ";
+        }
+        out << column_id << ":" << block_position;
+    }
+    out << "}, conjunct_count=" << conjuncts.size()
+        << ", delete_conjunct_count=" << delete_conjuncts.size() << ", column_predicate_filters="
+        << join_debug_strings(
+                   column_predicate_filters,
+                   [](const FileColumnPredicateFilter& filter) { return filter.debug_string(); })
+        << "}";
+    return out.str();
+}
+
+Status FileReader::init(RuntimeState* state) {
+    _init_profile();
+    SCOPED_RAW_TIMER(&_reader_statistics.file_reader_create_time);
+    ++_reader_statistics.open_file_num;
+    io::FileReaderOptions reader_options =
+            FileFactory::get_reader_options(state->query_options(), *_file_description);
+    _file_reader = DORIS_TRY(io::DelegateReader::create_file_reader(
+            _profile, *_system_properties, *_file_description, reader_options,
+            io::DelegateReader::AccessMode::RANDOM, _io_ctx));
+    // IOContext can be present without file_reader_stats in standalone tests or callers that only
+    // need extra IO state. TracingFileReader dereferences the stats pointer on every read, so only
+    // wrap the physical reader when stats collection is actually available.
+    _tracing_file_reader = _io_ctx && _io_ctx->file_reader_stats
+                                   ? std::make_shared<io::TracingFileReader>(
+                                             _file_reader, _io_ctx->file_reader_stats)
+                                   : _file_reader;
+    _eof = false;
+    return Status::OK();
+}
+
+std::unique_ptr<TableColumnMapper> FileReader::create_column_mapper(
+        TableColumnMapperOptions options) const {
+    return std::make_unique<TableColumnMapper>(std::move(options));
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/file_reader.h b/be/src/format_v2/file_reader.h
new file mode 100644
index 00000000000000..76b8bea60973ac
--- /dev/null
+++ b/be/src/format_v2/file_reader.h
@@ -0,0 +1,400 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/field.h"
+#include "exprs/vexpr_fwd.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/file_factory.h"
+#include "io/fs/file_reader_writer_fwd.h"
+
+namespace doris {
+class Block;
+class ColumnPredicate;
+struct ConditionCacheContext;
+
+namespace io {
+struct IOContext;
+} // namespace io
+} // namespace doris
+
+namespace doris::format {
+
+class TableColumnMapper;
+struct TableColumnMapperOptions;
+
+// Struct-only nested predicate target used by file-layer pruning.
+// This intentionally models only a STRUCT field chain. LIST/MAP/repeated predicates need explicit
+// quantified semantics, so they must not be encoded here.
+struct FileStructPredicateTarget {
+    int32_t file_local_id = -1;
+    std::string file_child_name;
+    std::unique_ptr<FileStructPredicateTarget> child;
+
+    FileStructPredicateTarget() = default;
+    FileStructPredicateTarget(int32_t local_id, std::string child_name,
+                              std::unique_ptr<FileStructPredicateTarget> nested_child = nullptr)
+            : file_local_id(local_id),
+              file_child_name(std::move(child_name)),
+              child(std::move(nested_child)) {}
+    FileStructPredicateTarget(const FileStructPredicateTarget& other);
+    FileStructPredicateTarget& operator=(const FileStructPredicateTarget& other);
+    FileStructPredicateTarget(FileStructPredicateTarget&& other) noexcept = default;
+    FileStructPredicateTarget& operator=(FileStructPredicateTarget&& other) noexcept = default;
+};
+
+struct FileNestedPredicateTarget {
+    LocalColumnId file_column_id = LocalColumnId::invalid();
+    // Null means the predicate targets the top-level primitive column itself.
+    std::unique_ptr<FileStructPredicateTarget> struct_target;
+
+    FileNestedPredicateTarget() = default;
+    explicit FileNestedPredicateTarget(LocalColumnId column_id) : file_column_id(column_id) {}
+    FileNestedPredicateTarget(LocalColumnId column_id,
+                              std::unique_ptr<FileStructPredicateTarget> target)
+            : file_column_id(column_id), struct_target(std::move(target)) {}
+    FileNestedPredicateTarget(const FileNestedPredicateTarget& other);
+    FileNestedPredicateTarget& operator=(const FileNestedPredicateTarget& other);
+    FileNestedPredicateTarget(FileNestedPredicateTarget&& other) noexcept = default;
+    FileNestedPredicateTarget& operator=(FileNestedPredicateTarget&& other) noexcept = default;
+
+    bool is_valid() const { return file_column_id.is_valid(); }
+};
+
+// File-local single-column predicates for file-layer pruning, such as min/max, page index,
+// dictionary and bloom filter.
+// Predicates must all belong to target.file_column_id. target.struct_target points to the nested
+// primitive leaf under that root; null means the top-level column itself is the primitive leaf.
+// These predicates are pruning hints only and are not row-level conjuncts.
+struct FileColumnPredicateFilter {
+    FileNestedPredicateTarget target;
+    // Compatibility fields for call sites and tests that still construct pruning filters directly.
+    // New mapper code should fill target; file readers consume target first and only fall back to
+    // these fields while the API migration is in progress.
+    LocalColumnId file_column_id = LocalColumnId::invalid();
+    std::vector<int32_t> file_child_id_path;
+    std::vector<std::shared_ptr<ColumnPredicate>> predicates;
+
+    LocalColumnId effective_file_column_id() const;
+    std::vector<int32_t> effective_file_child_id_path() const;
+    bool same_target_as(const FileColumnPredicateFilter& other) const;
+    std::string debug_string() const;
+};
+
+enum class FileFormat {
+    PARQUET,
+    ORC,
+    CSV,
+    JSON,
+    TEXT,
+    JNI,
+    NATIVE,
+    ARROW,
+};
+
+struct FileScanRequest {
+    virtual ~FileScanRequest() = default;
+
+    std::string debug_string() const;
+
+    // Columns that must be read before row-level filtering. They are materialized eagerly because
+    // conjuncts/delete_conjuncts need them to decide the selected rows.
+    std::vector<LocalColumnIndex> predicate_columns;
+    // Columns read after row-level filtering. Predicate columns are also available for output and
+    // should not be duplicated here.
+    std::vector<LocalColumnIndex> non_predicate_columns;
+    // file-local column id -> file-local output block position.
+    std::map<LocalColumnId, LocalIndex> local_positions;
+    // Row-level filters converted to file-local expressions from table-level predicates.
+    VExprContextSPtrs conjuncts;
+    // Delete predicates converted to file-local expressions.
+    VExprContextSPtrs delete_conjuncts;
+    // Single-column predicates used only for file-layer pruning, such as statistics, page index,
+    // dictionary and bloom filter. They must not be used for batch row-level filtering.
+    std::vector<FileColumnPredicateFilter> column_predicate_filters;
+};
+
+// Helper for constructing the scan-column layout in FileScanRequest.
+// FileScanRequest keeps predicate and non-predicate columns separate because columnar readers such
+// as Parquet can read predicate columns first, filter rows, and then lazily read the remaining
+// projected columns. The two lists still share one file-local output block, whose positions are
+// stored in local_positions. This builder centralizes the mechanical rules for that shared layout:
+// - each root file column gets one stable block position;
+// - predicate columns dominate non-predicate columns because they are already returned in the file
+//   block and can be reused for final materialization;
+// - repeated nested projections for the same root are merged instead of duplicated.
+// TableColumnMapper should still own table-to-file semantic resolution. This helper only owns the
+// FileScanRequest layout contract after a file-local projection has been produced.
+class FileScanRequestBuilder {
+public:
+    explicit FileScanRequestBuilder(FileScanRequest* request) : _request(request) {
+        DORIS_CHECK(_request != nullptr);
+    }
+
+    Status add_predicate_column(LocalColumnIndex projection) {
+        return _add_column(std::move(projection), &_request->predicate_columns,
+                           /*is_predicate_column=*/true);
+    }
+
+    Status add_non_predicate_column(LocalColumnIndex projection) {
+        return _add_column(std::move(projection), &_request->non_predicate_columns,
+                           /*is_predicate_column=*/false);
+    }
+
+    Status add_predicate_column(LocalColumnId column_id) {
+        return add_predicate_column(LocalColumnIndex::top_level(column_id));
+    }
+
+    Status add_non_predicate_column(LocalColumnId column_id) {
+        return add_non_predicate_column(LocalColumnIndex::top_level(column_id));
+    }
+
+private:
+    static LocalIndex _next_block_position(const FileScanRequest& request) {
+        size_t next_position = 0;
+        for (const auto& [_, block_position] : request.local_positions) {
+            next_position = std::max(next_position, block_position.value() + 1);
+        }
+        return LocalIndex(next_position);
+    }
+
+    static void _sort_projection_children_by_file_id(LocalColumnIndex* projection) {
+        DORIS_CHECK(projection != nullptr);
+        if (projection->project_all_children) {
+            return;
+        }
+        for (auto& child : projection->children) {
+            _sort_projection_children_by_file_id(&child);
+        }
+        std::ranges::sort(projection->children,
+                          [](const LocalColumnIndex& lhs, const LocalColumnIndex& rhs) {
+                              return lhs.local_id() < rhs.local_id();
+                          });
+    }
+
+    Status _add_column(LocalColumnIndex projection, std::vector<LocalColumnIndex>* scan_columns,
+                       bool is_predicate_column) {
+        DORIS_CHECK(scan_columns != nullptr);
+        const auto file_column_id = projection.column_id();
+        DORIS_CHECK(file_column_id != LocalColumnId::invalid());
+        if (!is_predicate_column &&
+            std::ranges::find_if(_request->predicate_columns, [&](const LocalColumnIndex& p) {
+                return p.column_id() == file_column_id;
+            }) != _request->predicate_columns.end()) {
+            return Status::OK();
+        }
+        if (!_request->local_positions.contains(file_column_id)) {
+            _request->local_positions.emplace(file_column_id, _next_block_position(*_request));
+        }
+
+        _sort_projection_children_by_file_id(&projection);
+        auto existing_projection_it = std::ranges::find_if(
+                *scan_columns,
+                [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; });
+        if (existing_projection_it == scan_columns->end()) {
+            scan_columns->push_back(std::move(projection));
+        } else {
+            RETURN_IF_ERROR(merge_local_column_index(&*existing_projection_it, projection));
+            _sort_projection_children_by_file_id(&*existing_projection_it);
+        }
+
+        if (is_predicate_column) {
+            auto it = std::ranges::find_if(
+                    _request->non_predicate_columns,
+                    [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; });
+            if (it != _request->non_predicate_columns.end()) {
+                _request->non_predicate_columns.erase(it);
+            }
+        }
+        return Status::OK();
+    }
+
+    FileScanRequest* _request = nullptr;
+};
+
+struct FileAggregateRequest {
+    struct Column {
+        // File-local projection for the aggregate column. For nested MIN/MAX, this points to the
+        // single primitive leaf that can be represented by file statistics. For COUNT(col), this
+        // points to the top-level column whose NULL-ness should be counted.
+        LocalColumnIndex projection;
+    };
+
+    TPushAggOp::type agg_type = TPushAggOp::type::NONE;
+    // Empty for COUNT(*)/row-count pushdown. Non-empty for COUNT(col), where the file reader must
+    // return the number of non-NULL rows for the requested column instead of total rows.
+    std::vector<Column> columns;
+};
+
+struct FileAggregateResult {
+    struct Column {
+        // Mirrors FileAggregateRequest::Column::projection so TableReader can put the returned
+        // aggregate value back into the matching projected nested shape.
+        LocalColumnIndex projection;
+        bool has_min = false;
+        bool has_max = false;
+        Field min_value;
+        Field max_value;
+    };
+
+    int64_t count = 0;
+    std::vector<Column> columns;
+};
+
+/**
+ *                                +-----> get_schema() -----------------+
+ * FileReader() -----> init() ----|                                      -----> close()
+ *                                +-----> open() -----> get_block() ----+
+ */
+class FileReader {
+public:
+    struct ReaderStatistics {
+        int32_t filtered_row_groups = 0;
+        int32_t filtered_row_groups_by_min_max = 0;
+        int32_t filtered_row_groups_by_bloom_filter = 0;
+        int32_t read_row_groups = 0;
+        int64_t filtered_group_rows = 0;
+        int64_t filtered_page_rows = 0;
+        int64_t lazy_read_filtered_rows = 0;
+        int64_t read_rows = 0;
+        int64_t filtered_bytes = 0;
+        int64_t column_read_time = 0;
+        int64_t parse_meta_time = 0;
+        int64_t parse_footer_time = 0;
+        int64_t file_footer_read_calls = 0;
+        int64_t file_footer_hit_cache = 0;
+        int64_t file_reader_create_time = 0;
+        int64_t open_file_num = 0;
+        int64_t row_group_filter_time = 0;
+        int64_t page_index_filter_time = 0;
+        int64_t read_page_index_time = 0;
+        int64_t parse_page_index_time = 0;
+        int64_t predicate_filter_time = 0;
+        int64_t dict_filter_rewrite_time = 0;
+        int64_t bloom_filter_read_time = 0;
+    };
+
+    FileReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+               std::unique_ptr<io::FileDescription>& file_description,
+               std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile)
+            : _system_properties(system_properties),
+              _file_description(std::move(file_description)),
+              _io_ctx(io_ctx),
+              _profile(profile) {}
+    virtual ~FileReader() = default;
+
+    // Initialize file reader and parse file metadata.
+    virtual Status init(RuntimeState* state);
+
+    // Set the maximum row count for the next physical read batch. Readers that do not batch by
+    // rows may ignore it.
+    virtual void set_batch_size(size_t batch_size) { (void)batch_size; }
+
+    // Get semantic file-local schema from file metadata. The file schema is determined by file
+    // format and file content, and does not contain table/global schema semantics. A file reader may
+    // expose raw file identifiers, such as Parquet field_id, through ColumnDefinition::identifier,
+    // but it must not interpret table-format semantics such as Iceberg name mapping,
+    // default/generated columns, or partition columns. File-format physical wrappers should be
+    // normalized away before exposing this schema; for example, Parquet MAP is exposed as key/value
+    // children rather than key_value/entry.
+    // Doris plans external-table scan types as nullable, including all nested children of complex
+    // types. This protects Doris from illegal or inconsistent values produced by external systems.
+    // Therefore every ColumnDefinition::type returned here must be nullable. Complex types must
+    // also expose nullable child types recursively, even if the physical file marks those fields as
+    // required.
+    // This method can only be called after init() successfully, but does not require open() to be
+    // called.
+    virtual Status get_schema(std::vector<ColumnDefinition>* file_schema) const = 0;
+
+    // Create the mapper that matches this reader's scan-request capabilities. TableReader still
+    // owns table-format semantics such as BY_NAME/BY_FIELD_ID/BY_INDEX, partition values and
+    // default expressions; the FileReader only chooses whether file-local requests support columnar
+    // lazy materialization/pruning or must materialize one flat list of required columns.
+    virtual std::unique_ptr<TableColumnMapper> create_column_mapper(
+            TableColumnMapperOptions options) const;
+
+    // Open the file reader with file-local scan request. The file reader should initialize its internal state according to the request, but does not need to interpret table/global schema semantics. For example, all schema change, filter localization, default/generated/partition columns should be handled in table reader layer. This method can only be called after init() successfully.
+    virtual Status open(std::shared_ptr<FileScanRequest> request) {
+        _request = std::move(request);
+        return Status::OK();
+    }
+
+    virtual Status get_block(Block* file_block, size_t* rows, bool* eof) {
+        if (rows != nullptr) {
+            *rows = 0;
+        }
+        if (eof != nullptr) {
+            *eof = true;
+        }
+        _eof = true;
+        return Status::OK();
+    }
+
+    virtual Status get_aggregate_result(const FileAggregateRequest& request,
+                                        FileAggregateResult* result) {
+        return Status::NotSupported("FileReader does not support aggregate pushdown");
+    }
+
+    // Condition cache is managed by TableReader and consumed by physical file readers.
+    // On cache HIT, readers may skip granules whose cached bit is false before doing column IO.
+    // On cache MISS, readers mark a granule true when row-level predicates keep at least one row
+    // in that granule. Readers that cannot map batch rows to stable file-global row ids should
+    // keep the default no-op implementation.
+    virtual void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) {}
+
+    // Total rows covered by this physical reader. TableReader uses it to pre-size the miss bitmap.
+    // Readers should return 0 if the metadata is unavailable or the row coordinate is unstable.
+    virtual int64_t get_total_rows() const { return 0; }
+
+    virtual Status close() {
+        _file_reader.reset();
+        _tracing_file_reader.reset();
+        _io_ctx.reset();
+        _eof = true;
+        return Status::OK();
+    }
+
+protected:
+    virtual void _init_profile() {}
+
+    io::FileReaderSPtr _file_reader;
+    // _tracing_file_reader wraps _file_reader.
+    // _file_reader is original file reader.
+    // _tracing_file_reader is tracing file reader with io context.
+    // If io_ctx is null, _tracing_file_reader will be the same as file_reader.
+    io::FileReaderSPtr _tracing_file_reader = nullptr;
+    std::shared_ptr<FileScanRequest> _request;
+    bool _eof = true;
+    ReaderStatistics _reader_statistics;
+    std::shared_ptr<io::FileSystemProperties> _system_properties;
+    std::unique_ptr<io::FileDescription> _file_description;
+    std::shared_ptr<io::IOContext> _io_ctx;
+    RuntimeProfile* _profile = nullptr;
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/jni/hudi_jni_reader.cpp b/be/src/format_v2/jni/hudi_jni_reader.cpp
new file mode 100644
index 00000000000000..3247e3c683c2de
--- /dev/null
+++ b/be/src/format_v2/jni/hudi_jni_reader.cpp
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/hudi_jni_reader.h"
+
+#include <string_view>
+
+#include "core/block/block.h"
+#include "exprs/vexpr_context.h"
+#include "util/string_util.h"
+#include "util/uid_util.h"
+
+namespace doris::format::hudi {
+namespace {
+
+constexpr std::string_view HOODIE_CONF_PREFIX = "hoodie.";
+constexpr std::string_view HADOOP_CONF_PREFIX = "hadoop_conf.";
+
+} // namespace
+
+Status HudiJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError("missing table_format_params for hudi jni reader");
+    }
+    if (!range.table_format_params.__isset.hudi_params) {
+        return Status::InternalError("missing hudi_params for hudi jni reader");
+    }
+    const auto& hudi_params = range.table_format_params.hudi_params;
+    if (!hudi_params.__isset.base_path || hudi_params.base_path.empty()) {
+        return Status::InternalError(
+                "missing base_path for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (!hudi_params.__isset.data_file_path || hudi_params.data_file_path.empty()) {
+        return Status::InternalError(
+                "missing data_file_path for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (!hudi_params.__isset.data_file_length) {
+        return Status::InternalError(
+                "missing data_file_length for hudi jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!hudi_params.__isset.column_names) {
+        return Status::InternalError(
+                "missing column_names for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (!hudi_params.__isset.column_types) {
+        return Status::InternalError(
+                "missing column_types for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    DORIS_CHECK(hudi_params.column_names.size() == hudi_params.column_types.size());
+    if (_scan_params == nullptr) {
+        return Status::InternalError(
+                "missing scan params for hudi jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    return Status::OK();
+}
+
+std::string HudiJniReader::connector_class() const {
+    return "org/apache/doris/hudi/HadoopHudiJniScanner";
+}
+
+Status HudiJniReader::build_scanner_params(std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    DORIS_CHECK(_scan_params != nullptr);
+    params->clear();
+
+    const auto& hudi_params = _current_range.table_format_params.hudi_params;
+    (*params)["base_path"] = hudi_params.base_path;
+    (*params)["data_file_path"] = hudi_params.data_file_path;
+    (*params)["data_file_length"] = std::to_string(hudi_params.data_file_length);
+    (*params)["delta_file_paths"] = join(hudi_params.delta_logs, ",");
+    (*params)["hudi_column_names"] = join(hudi_params.column_names, ",");
+    (*params)["hudi_column_types"] = join(hudi_params.column_types, "#");
+    (*params)["instant_time"] = hudi_params.instant_time;
+    (*params)["serde"] = hudi_params.serde;
+    (*params)["input_format"] = hudi_params.input_format;
+    if (_runtime_state != nullptr) {
+        (*params)["query_id"] = print_id(_runtime_state->query_id());
+    }
+
+    for (const auto& kv : _scan_params->properties) {
+        if (kv.first.starts_with(HOODIE_CONF_PREFIX)) {
+            (*params)[kv.first] = kv.second;
+        } else {
+            (*params)[std::string(HADOOP_CONF_PREFIX) + kv.first] = kv.second;
+        }
+    }
+    return Status::OK();
+}
+
+Status HudiJniReader::build_jni_columns(
+        std::vector<format::JniTableReader::JniColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    columns->reserve(_projected_columns.size());
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        if (table_column.is_partition_key &&
+            find_partition_value(table_column, _partition_values) != nullptr) {
+            continue;
+        }
+        columns->push_back({
+                .java_name = table_column.name,
+                .output_index = i,
+                .output_type = table_column.type,
+                .transfer_type = table_column.type,
+                .replace_type = "not_replace",
+        });
+    }
+    return Status::OK();
+}
+
+Status HudiJniReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) {
+    DORIS_CHECK(jni_block != nullptr);
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    const auto original_rows = *rows;
+
+    const auto& columns = jni_columns();
+    DORIS_CHECK(columns.size() == jni_block->columns());
+    for (size_t i = 0; i < columns.size(); ++i) {
+        const auto& column = columns[i];
+        DORIS_CHECK(column.output_index < output_block->columns());
+        output_block->get_by_position(column.output_index).type = column.output_type;
+        output_block->replace_by_position(column.output_index,
+                                          jni_block->get_by_position(i).column);
+    }
+
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        const auto* partition_value = find_partition_value(table_column, _partition_values);
+        if (!table_column.is_partition_key || partition_value == nullptr) {
+            continue;
+        }
+        output_block->get_by_position(i).type = table_column.type;
+        output_block->replace_by_position(
+                i, table_column.type->create_column_const(original_rows, *partition_value));
+    }
+    DORIS_CHECK(output_block->rows() == original_rows);
+    if (!_conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_conjuncts, output_block, output_block->columns()));
+    }
+    *rows = output_block->rows();
+    return Status::OK();
+}
+
+} // namespace doris::format::hudi
diff --git a/be/src/format_v2/jni/hudi_jni_reader.h b/be/src/format_v2/jni/hudi_jni_reader.h
new file mode 100644
index 00000000000000..4beb6f2d1728b6
--- /dev/null
+++ b/be/src/format_v2/jni/hudi_jni_reader.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::hudi {
+
+class HudiJniReader final : public format::JniTableReader {
+public:
+    ~HudiJniReader() override = default;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+    Status build_jni_columns(
+            std::vector<format::JniTableReader::JniColumn>* columns) const override;
+    Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override;
+};
+
+} // namespace doris::format::hudi
diff --git a/be/src/format_v2/jni/iceberg_sys_table_reader.cpp b/be/src/format_v2/jni/iceberg_sys_table_reader.cpp
new file mode 100644
index 00000000000000..b41d505f886d31
--- /dev/null
+++ b/be/src/format_v2/jni/iceberg_sys_table_reader.cpp
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/iceberg_sys_table_reader.h"
+
+#include <string_view>
+
+#include "format/jni/jni_data_bridge.h"
+#include "util/string_util.h"
+
+namespace doris::format::iceberg {
+namespace {
+
+constexpr std::string_view HADOOP_OPTION_PREFIX = "hadoop.";
+
+} // namespace
+
+Status IcebergSysTableJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError(
+                "missing table_format_params for iceberg sys table jni reader");
+    }
+    if (!range.table_format_params.__isset.iceberg_params) {
+        return Status::InternalError("missing iceberg_params for iceberg sys table jni reader");
+    }
+    if (!range.table_format_params.iceberg_params.__isset.serialized_split ||
+        range.table_format_params.iceberg_params.serialized_split.empty()) {
+        return Status::InternalError(
+                "missing serialized_split for iceberg sys table jni reader, "
+                "possibly caused by FE/BE protocol mismatch");
+    }
+    return Status::OK();
+}
+
+std::string IcebergSysTableJniReader::connector_class() const {
+    return "org/apache/doris/iceberg/IcebergSysTableJniScanner";
+}
+
+Status IcebergSysTableJniReader::build_scanner_params(
+        std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    params->clear();
+    params->emplace("serialized_split",
+                    _current_range.table_format_params.iceberg_params.serialized_split);
+
+    std::vector<std::string> required_types;
+    required_types.reserve(_projected_columns.size());
+    for (const auto& column : _projected_columns) {
+        required_types.emplace_back(JniDataBridge::get_jni_type_with_different_string(column.type));
+    }
+    (*params)["required_types"] = join(required_types, "#");
+
+    if (_scan_params != nullptr && _scan_params->__isset.properties &&
+        !_scan_params->properties.empty()) {
+        for (const auto& kv : _scan_params->properties) {
+            (*params)[std::string(HADOOP_OPTION_PREFIX) + kv.first] = kv.second;
+        }
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::iceberg
diff --git a/be/src/format_v2/jni/iceberg_sys_table_reader.h b/be/src/format_v2/jni/iceberg_sys_table_reader.h
new file mode 100644
index 00000000000000..be254c39f3ffb5
--- /dev/null
+++ b/be/src/format_v2/jni/iceberg_sys_table_reader.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::iceberg {
+
+class IcebergSysTableJniReader final : public format::JniTableReader {
+public:
+    ~IcebergSysTableJniReader() override = default;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+};
+
+} // namespace doris::format::iceberg
diff --git a/be/src/format_v2/jni/jdbc_reader.cpp b/be/src/format_v2/jni/jdbc_reader.cpp
new file mode 100644
index 00000000000000..e0391f3a13a8f0
--- /dev/null
+++ b/be/src/format_v2/jni/jdbc_reader.cpp
@@ -0,0 +1,187 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/jdbc_reader.h"
+
+#include <memory>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/block/columns_with_type_and_name.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "exprs/function/simple_function_factory.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/table_reader.h"
+#include "util/jdbc_utils.h"
+
+namespace doris::format::jdbc {
+
+std::string JdbcJniReader::connector_class() const {
+    return "org/apache/doris/jdbc/JdbcJniScanner";
+}
+
+Status JdbcJniReader::prepare_split(const format::SplitReadOptions& options) {
+    _jdbc_params.clear();
+    if (options.current_range.__isset.table_format_params &&
+        options.current_range.table_format_params.table_format_type == "jdbc") {
+        _jdbc_params = std::map<std::string, std::string>(
+                options.current_range.table_format_params.jdbc_params.begin(),
+                options.current_range.table_format_params.jdbc_params.end());
+    }
+    return format::JniTableReader::prepare_split(options);
+}
+
+// need pass to the java side, so the java scanner can parse the params and construct the JDBC connection
+Status JdbcJniReader::build_scanner_params(std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    *params = _jdbc_params;
+    if (params->contains("jdbc_driver_url")) {
+        std::string resolved;
+        if (JdbcUtils::resolve_driver_url((*params)["jdbc_driver_url"], &resolved).ok()) {
+            (*params)["jdbc_driver_url"] = resolved;
+        }
+    }
+    return Status::OK();
+}
+
+Status JdbcJniReader::build_jni_columns(
+        std::vector<format::JniTableReader::JniColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    columns->reserve(_projected_columns.size());
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        const auto primitive_type = remove_nullable(table_column.type)->get_primitive_type();
+        columns->push_back({
+                .java_name = table_column.name,
+                .output_index = i,
+                .output_type = table_column.type,
+                .transfer_type = _transfer_type_for(table_column.type),
+                .replace_type = _replace_type_for(primitive_type),
+        });
+    }
+    return Status::OK();
+}
+
+Status JdbcJniReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) {
+    DORIS_CHECK(jni_block != nullptr);
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    const auto original_rows = *rows;
+    const auto& columns = jni_columns();
+    DORIS_CHECK(columns.size() == jni_block->columns());
+
+    for (size_t i = 0; i < columns.size(); ++i) {
+        const auto& column = columns[i];
+        DORIS_CHECK(column.output_type != nullptr);
+        DORIS_CHECK(column.output_index < output_block->columns());
+        if (_is_special_type(remove_nullable(column.output_type)->get_primitive_type())) {
+            RETURN_IF_ERROR(_cast_string_to_special_type(column, jni_block, i, output_block,
+                                                         original_rows));
+            continue;
+        }
+        output_block->get_by_position(column.output_index).type = column.output_type;
+        output_block->replace_by_position(column.output_index,
+                                          jni_block->get_by_position(i).column);
+    }
+    DORIS_CHECK(output_block->rows() == original_rows);
+    if (!_conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_conjuncts, output_block, output_block->columns()));
+    }
+    *rows = output_block->rows();
+    return Status::OK();
+}
+
+std::string JdbcJniReader::_replace_type_for(PrimitiveType type) const {
+    switch (type) {
+    case PrimitiveType::TYPE_BITMAP:
+        return "bitmap";
+    case PrimitiveType::TYPE_HLL:
+        return "hll";
+    case PrimitiveType::TYPE_QUANTILE_STATE:
+        return "quantile_state";
+    case PrimitiveType::TYPE_JSONB:
+        return "jsonb";
+    default:
+        return "not_replace";
+    }
+}
+
+bool JdbcJniReader::_is_special_type(PrimitiveType type) const {
+    return type == PrimitiveType::TYPE_BITMAP || type == PrimitiveType::TYPE_HLL ||
+           type == PrimitiveType::TYPE_QUANTILE_STATE || type == PrimitiveType::TYPE_JSONB;
+}
+
+DataTypePtr JdbcJniReader::_transfer_type_for(const DataTypePtr& output_type) const {
+    DORIS_CHECK(output_type != nullptr);
+    if (!_is_special_type(remove_nullable(output_type)->get_primitive_type())) {
+        return output_type;
+    }
+    DataTypePtr string_type = std::make_shared<DataTypeString>();
+    if (output_type->is_nullable()) {
+        string_type = make_nullable(string_type);
+    }
+    return string_type;
+}
+
+Status JdbcJniReader::_cast_string_to_special_type(const format::JniTableReader::JniColumn& column,
+                                                   Block* jni_block, size_t jni_column_index,
+                                                   Block* output_block, size_t rows) {
+    DORIS_CHECK(column.output_type != nullptr);
+    DORIS_CHECK(column.transfer_type != nullptr);
+    const auto target_type = column.output_type;
+    const auto target_type_name = target_type->get_name();
+
+    ColumnPtr input_column = jni_block->get_by_position(jni_column_index).column;
+    ColumnPtr cast_param = target_type->create_column_const_with_default_value(1);
+
+    ColumnsWithTypeAndName argument_template;
+    argument_template.reserve(2);
+    argument_template.emplace_back(std::move(input_column), column.transfer_type,
+                                   "java.sql.String");
+    argument_template.emplace_back(std::move(cast_param), target_type, target_type_name);
+
+    FunctionBasePtr cast_function = SimpleFunctionFactory::instance().get_function(
+            "CAST", argument_template, make_nullable(target_type));
+    if (cast_function == nullptr) {
+        return Status::InternalError("Failed to find CAST function for type {}", target_type_name);
+    }
+
+    Block cast_block(argument_template);
+    const auto result_idx = cast_set<uint32_t>(cast_block.columns());
+    cast_block.insert({nullptr, make_nullable(target_type), "cast_result"});
+    RETURN_IF_ERROR(
+            cast_function->execute(nullptr, cast_block, {0}, result_idx, cast_set<int>(rows)));
+
+    auto result_column = cast_block.get_by_position(result_idx).column;
+    output_block->get_by_position(column.output_index).type = target_type;
+    if (target_type->is_nullable()) {
+        output_block->replace_by_position(column.output_index, result_column);
+    } else {
+        const auto* nullable_column = assert_cast<const ColumnNullable*>(result_column.get());
+        output_block->replace_by_position(column.output_index,
+                                          nullable_column->get_nested_column_ptr());
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::jdbc
diff --git a/be/src/format_v2/jni/jdbc_reader.h b/be/src/format_v2/jni/jdbc_reader.h
new file mode 100644
index 00000000000000..91a5878cb4622f
--- /dev/null
+++ b/be/src/format_v2/jni/jdbc_reader.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "core/types.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::jdbc {
+
+class JdbcJniReader final : public format::JniTableReader {
+public:
+    ~JdbcJniReader() override = default;
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+
+protected:
+    std::string connector_class() const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+    Status build_jni_columns(
+            std::vector<format::JniTableReader::JniColumn>* columns) const override;
+    Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override;
+
+private:
+    bool _is_special_type(PrimitiveType type) const;
+    std::string _replace_type_for(PrimitiveType type) const;
+    DataTypePtr _transfer_type_for(const DataTypePtr& output_type) const;
+    Status _cast_string_to_special_type(const format::JniTableReader::JniColumn& column,
+                                        Block* jni_block, size_t jni_column_index,
+                                        Block* output_block, size_t rows);
+
+    std::map<std::string, std::string> _jdbc_params;
+};
+
+} // namespace doris::format::jdbc
diff --git a/be/src/format_v2/jni/jni_table_reader.cpp b/be/src/format_v2/jni/jni_table_reader.cpp
new file mode 100644
index 00000000000000..d43a22e632b26b
--- /dev/null
+++ b/be/src/format_v2/jni/jni_table_reader.cpp
@@ -0,0 +1,386 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/jni_table_reader.h"
+
+#include <utility>
+
+#include "common/cast_set.h"
+#include "core/block/block.h"
+#include "exprs/vexpr_context.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/string_util.h"
+
+namespace doris::format {
+
+Status JniTableReader::init(TableReadOptions&& options) {
+    RETURN_IF_ERROR(TableReader::init(std::move(options)));
+    _init_profile();
+
+    // JNI readers do not go through TableReader::open_reader(), where file-local filters are
+    // prepared for file readers. They execute table-level conjuncts directly on the JNI block.
+    RowDescriptor row_desc;
+    for (const auto& conjunct : _conjuncts) {
+        RETURN_IF_ERROR(conjunct->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(conjunct->open(_runtime_state));
+    }
+    return Status::OK();
+}
+
+Status JniTableReader::prepare_split(const SplitReadOptions& options) {
+    _current_range = options.current_range;
+    RETURN_IF_ERROR(validate_scan_range(options.current_range));
+    RETURN_IF_ERROR(TableReader::prepare_split(options));
+    DORIS_CHECK(!_closed);
+    DORIS_CHECK(!_scanner_opened);
+    if (_is_table_level_count_active()) {
+        return Status::OK();
+    }
+    // Subclasses populate split-specific scanner params before calling this method, so the Java
+    // scanner can be opened here instead of being lazily opened by the first get_block() call.
+    return _open_jni_scanner();
+}
+
+Status JniTableReader::get_block(Block* output_block, bool* eos) {
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(eos != nullptr);
+    DORIS_CHECK(output_block->columns() == _projected_columns.size());
+    output_block->clear_column_data(_projected_columns.size());
+    if (_is_table_level_count_active()) {
+        return _read_table_level_count(output_block, eos);
+    }
+
+    DORIS_CHECK(_scanner_opened);
+    if (_eof) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    while (true) {
+        size_t current_rows = 0;
+        bool current_eof = false;
+        // get next block data from Java scanner, and fill the data to _jni_block_template
+        RETURN_IF_ERROR(_get_next_jni_block(&current_rows, &current_eof));
+        if (current_eof) {
+            _eof = true;
+            RETURN_IF_ERROR(_close_jni_scanner());
+            *eos = true;
+            return Status::OK();
+        }
+
+        RETURN_IF_ERROR(finalize_jni_block(&_jni_block_template, output_block, &current_rows));
+        if (current_rows == 0) {
+            output_block->clear_column_data(_projected_columns.size());
+            continue;
+        }
+        *eos = false;
+        return Status::OK();
+    }
+}
+
+Status JniTableReader::_get_next_jni_block(size_t* rows, bool* eof) {
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    *rows = 0;
+    _jni_block_template.clear_column_data(_jni_columns.size());
+
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+    long meta_address = 0;
+    {
+        SCOPED_RAW_TIMER(&_java_scan_watcher);
+        //getNextBatchMeta function, return the meta address
+        RETURN_IF_ERROR(_jni_scanner_obj.call_long_method(env, _jni_scanner_get_next_batch)
+                                .call(&meta_address));
+    }
+    RETURN_ERROR_IF_EXC(env);
+    if (meta_address == 0) {
+        *eof = true;
+        return Status::OK();
+    }
+
+    JniDataBridge::TableMetaAddress table_meta(meta_address);
+    const auto num_rows = table_meta.next_meta_as_long();
+    if (num_rows == 0) {
+        *eof = true;
+        return Status::OK();
+    }
+
+    *rows = cast_set<size_t>(num_rows);
+    // fill data from Java table meta to C++ block
+    RETURN_IF_ERROR(_fill_jni_block(table_meta, *rows));
+    // call releaseTable() method in JAVA side to release the Java table Heap free Memory
+    RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call());
+    RETURN_ERROR_IF_EXC(env);
+    *eof = false;
+    return Status::OK();
+}
+
+// Java table to C++ block
+Status JniTableReader::_fill_jni_block(JniDataBridge::TableMetaAddress& table_meta,
+                                       size_t num_rows) {
+    SCOPED_RAW_TIMER(&_fill_block_watcher);
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+    for (size_t i = 0; i < _jni_columns.size(); ++i) {
+        const auto& read_column = _jni_columns[i];
+        auto& column_with_type_and_name = _jni_block_template.get_by_position(i);
+        auto& column_ptr = column_with_type_and_name.column;
+        RETURN_IF_ERROR(JniDataBridge::fill_column(table_meta, column_ptr,
+                                                   read_column.transfer_type, num_rows));
+        // call releaseColumn(int columnIndex) method in JAVA side to release the Java column Heap free Memory
+        RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_column)
+                                .with_arg(cast_set<int>(i))
+                                .call());
+        RETURN_ERROR_IF_EXC(env);
+    }
+    return Status::OK();
+}
+
+Status JniTableReader::finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) {
+    DORIS_CHECK(jni_block != nullptr);
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(jni_block->columns() == _jni_columns.size());
+    const auto original_rows = *rows;
+    for (size_t i = 0; i < _jni_columns.size(); ++i) {
+        const auto& column = _jni_columns[i];
+        DORIS_CHECK(column.output_index < output_block->columns());
+        output_block->get_by_position(column.output_index).type = column.output_type;
+        output_block->replace_by_position(column.output_index,
+                                          jni_block->get_by_position(i).column);
+    }
+    DORIS_CHECK(output_block->rows() == original_rows);
+    // Apply conjuncts on the output block
+    if (!_conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_conjuncts, output_block, output_block->columns()));
+    }
+    *rows = output_block->rows();
+    return Status::OK();
+}
+
+Status JniTableReader::build_jni_columns(std::vector<JniColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    columns->reserve(_projected_columns.size());
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        columns->push_back({
+                .java_name = table_column.name,
+                .output_index = i,
+                .output_type = table_column.type,
+                .transfer_type = table_column.type,
+                .replace_type = "not_replace",
+        });
+    }
+    return Status::OK();
+}
+
+int64_t JniTableReader::self_split_weight() const {
+    return _current_range.__isset.self_split_weight ? _current_range.self_split_weight : -1;
+}
+
+Status JniTableReader::close() {
+    if (_closed) {
+        return Status::OK();
+    }
+    _closed = true;
+    RETURN_IF_ERROR(_close_jni_scanner());
+    return TableReader::close();
+}
+
+Status JniTableReader::_close_jni_scanner() {
+    if (!_scanner_opened) {
+        JNIEnv* env = nullptr;
+        if (!_jni_scanner_obj.uninitialized()) {
+            RETURN_IF_ERROR(Jni::Env::Get(&env));
+        }
+        _reset_split_state(env);
+        return Status::OK();
+    }
+
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+    if (_scanner_profile != nullptr) {
+        COUNTER_UPDATE(_open_scanner_time, _jni_scanner_open_watcher);
+        COUNTER_UPDATE(_fill_block_time, _fill_block_watcher);
+    }
+
+    RETURN_ERROR_IF_EXC(env);
+    jlong append_data_time = 0;
+    RETURN_IF_ERROR(_jni_scanner_obj.call_long_method(env, _jni_scanner_get_append_data_time)
+                            .call(&append_data_time));
+    jlong create_vector_table_time = 0;
+    RETURN_IF_ERROR(
+            _jni_scanner_obj.call_long_method(env, _jni_scanner_get_create_vector_table_time)
+                    .call(&create_vector_table_time));
+    if (_scanner_profile != nullptr) {
+        COUNTER_UPDATE(_java_append_data_time, append_data_time);
+        COUNTER_UPDATE(_java_create_vector_table_time, create_vector_table_time);
+        COUNTER_UPDATE(_java_scan_time,
+                       _java_scan_watcher - append_data_time - create_vector_table_time);
+        _max_time_split_weight_counter->conditional_update(
+                _jni_scanner_open_watcher + _fill_block_watcher + _java_scan_watcher,
+                self_split_weight());
+    }
+
+    // _fill_jni_block may fail before releasing the current Java table. JniScanner::releaseTable()
+    // is idempotent, so closing the split always releases it.
+    RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_release_table).call());
+    RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_close).call());
+    _reset_split_state(env);
+    return Status::OK();
+}
+
+void JniTableReader::_reset_split_state(JNIEnv* env) {
+    if (!_jni_scanner_obj.uninitialized()) {
+        DORIS_CHECK(env != nullptr);
+        _jni_scanner_obj.reset(env);
+    }
+    _scanner_opened = false;
+    _eof = false;
+    _scanner_params.clear();
+    _jni_columns.clear();
+    _jni_block_template.clear();
+    _jni_scanner_open_watcher = 0;
+    _java_scan_watcher = 0;
+    _fill_block_watcher = 0;
+}
+
+Status JniTableReader::_open_jni_scanner() {
+    // subclasses build map<string,string> _scanner_params to JAVA side
+    RETURN_IF_ERROR(build_scanner_params(&_scanner_params));
+    // subclasses build _jni_columns info to JAVA side, including column name and column type
+    RETURN_IF_ERROR(build_jni_columns(&_jni_columns));
+    // _jni_columns info is used to build Java scanner schema params and JNI block template.
+    _prepare_jni_scanner_schema();
+
+    if (_runtime_state != nullptr && _batch_size == 0) {
+        _batch_size = _runtime_state->batch_size();
+    }
+    if (_runtime_state != nullptr) {
+        _scanner_params["time_zone"] = _runtime_state->timezone();
+    }
+
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+    SCOPED_RAW_TIMER(&_jni_scanner_open_watcher);
+    RETURN_IF_ERROR(_register_jni_class_functions_once(env));
+    RETURN_IF_ERROR(_create_jni_scanner_object(env, cast_set<int>(_batch_size)));
+    // call open() method in JAVA side.
+    RETURN_IF_ERROR(_jni_scanner_obj.call_void_method(env, _jni_scanner_open).call());
+    RETURN_ERROR_IF_EXC(env);
+
+    _scanner_opened = true;
+    return Status::OK();
+}
+
+void JniTableReader::_prepare_jni_scanner_schema() {
+    std::vector<std::string> required_fields;
+    std::vector<std::string> column_types;
+    std::vector<std::string> replace_types;
+    required_fields.reserve(_jni_columns.size());
+    column_types.reserve(_jni_columns.size());
+    replace_types.reserve(_jni_columns.size());
+    _jni_block_template.clear();
+    _jni_block_template.reserve(_jni_columns.size());
+
+    bool has_replace_type = false;
+    for (const auto& column : _jni_columns) {
+        DORIS_CHECK(column.transfer_type != nullptr);
+        required_fields.push_back(column.java_name);
+        column_types.push_back(
+                JniDataBridge::get_jni_type_with_different_string(column.transfer_type));
+        replace_types.push_back(column.replace_type);
+        has_replace_type = has_replace_type || column.replace_type != "not_replace";
+        _jni_block_template.insert(
+                {column.transfer_type->create_column(), column.transfer_type, column.java_name});
+    }
+    _scanner_params["required_fields"] = join(required_fields, ",");
+    _scanner_params["columns_types"] = join(column_types, "#");
+    if (has_replace_type) {
+        _scanner_params["replace_string"] = join(replace_types, ",");
+    }
+}
+
+Status JniTableReader::_register_jni_class_functions_once(JNIEnv* env) {
+    if (!_jni_scanner_cls.uninitialized()) {
+        return Status::OK();
+    }
+
+    RETURN_IF_ERROR(
+            Jni::Util::get_jni_scanner_class(env, connector_class().c_str(), &_jni_scanner_cls));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "<init>", "(ILjava/util/Map;)V",
+                                                &_jni_scanner_constructor));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "open", "()V", &_jni_scanner_open));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getNextBatchMeta", "()J",
+                                                &_jni_scanner_get_next_batch));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getAppendDataTime", "()J",
+                                                &_jni_scanner_get_append_data_time));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getCreateVectorTableTime", "()J",
+                                                &_jni_scanner_get_create_vector_table_time));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "close", "()V", &_jni_scanner_close));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "releaseColumn", "(I)V",
+                                                &_jni_scanner_release_column));
+    RETURN_IF_ERROR(
+            _jni_scanner_cls.get_method(env, "releaseTable", "()V", &_jni_scanner_release_table));
+    RETURN_IF_ERROR(_jni_scanner_cls.get_method(env, "getStatistics", "()Ljava/util/Map;",
+                                                &_jni_scanner_get_statistics));
+    RETURN_IF_ERROR(
+            _jni_scanner_cls.get_method(env, "setBatchSize", "(I)V", &_jni_scanner_set_batch_size));
+    return Status::OK();
+}
+
+Status JniTableReader::_create_jni_scanner_object(JNIEnv* env, int batch_size) {
+    DORIS_CHECK(!_jni_scanner_cls.uninitialized());
+    DORIS_CHECK(!_jni_scanner_constructor.uninitialized());
+    DORIS_CHECK(_jni_scanner_obj.uninitialized());
+    Jni::LocalObject hashmap_object;
+    RETURN_IF_ERROR(Jni::Util::convert_to_java_map(env, _scanner_params, &hashmap_object));
+    RETURN_IF_ERROR(_jni_scanner_cls.new_object(env, _jni_scanner_constructor)
+                            .with_arg(batch_size)
+                            .with_arg(hashmap_object)
+                            .call(&_jni_scanner_obj));
+    return Status::OK();
+}
+
+void JniTableReader::_init_profile() {
+    if (_scanner_profile == nullptr) {
+        return;
+    }
+    const auto connector_name = _connector_name();
+    ADD_TIMER(_scanner_profile, connector_name);
+    _open_scanner_time = ADD_CHILD_TIMER(_scanner_profile, "OpenScannerTime", connector_name);
+    _java_scan_time = ADD_CHILD_TIMER(_scanner_profile, "JavaScanTime", connector_name);
+    _java_append_data_time =
+            ADD_CHILD_TIMER(_scanner_profile, "JavaAppendDataTime", connector_name);
+    _java_create_vector_table_time =
+            ADD_CHILD_TIMER(_scanner_profile, "JavaCreateVectorTableTime", connector_name);
+    _fill_block_time = ADD_CHILD_TIMER(_scanner_profile, "FillBlockTime", connector_name);
+    _max_time_split_weight_counter = _scanner_profile->add_conditition_counter(
+            "MaxTimeSplitWeight", TUnit::UNIT, [](int64_t _c, int64_t c) { return c > _c; },
+            connector_name);
+}
+
+std::string JniTableReader::_connector_name() const {
+    const auto parts = split(connector_class(), "/");
+    return parts.empty() ? connector_class() : parts.back();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/jni/jni_table_reader.h b/be/src/format_v2/jni/jni_table_reader.h
new file mode 100644
index 00000000000000..1317661e5880c2
--- /dev/null
+++ b/be/src/format_v2/jni/jni_table_reader.h
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "format/jni/jni_data_bridge.h"
+#include "format_v2/table_reader.h"
+#include "runtime/runtime_profile.h"
+#include "util/jni-util.h"
+
+namespace doris::format {
+
+class JniTableReader : public TableReader {
+public:
+    struct JniColumn {
+        std::string java_name;
+        // The index of the column in the output block, which is used to place the data from Java side to the correct position in the output block.
+        size_t output_index = 0;
+        // The original output type of the column, which is used for type casting after getting the data from Java side. like Bitmap column
+        // For columns without special types, the transfer_type and output_type are the same.
+        DataTypePtr output_type;
+        //Bitmap Type transfer type is String, so the Java scanner will convert the Bitmap column to String before transferring the data to C++, and then C++ side can convert the String back to Bitmap.
+        DataTypePtr transfer_type;
+        std::string replace_type = "not_replace";
+    };
+
+    ~JniTableReader() override = default;
+
+    Status init(TableReadOptions&& options) override;
+    Status prepare_split(const SplitReadOptions& options) override;
+    Status get_block(Block* block, bool* eos) override;
+    Status close() override;
+
+protected:
+    // Subclasses should implement these methods to specify the Java scanner class
+    virtual std::string connector_class() const = 0;
+    virtual Status validate_scan_range(const TFileRangeDesc&) const { return Status::OK(); }
+    // Subclasses should implement this method to build the scanner params map
+    virtual Status build_scanner_params(std::map<std::string, std::string>* params) const = 0;
+    // Subclasses can override this method when Java transfer types differ from output types.
+    virtual Status build_jni_columns(std::vector<JniColumn>* columns) const;
+    virtual Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows);
+    // used for profile
+    virtual int64_t self_split_weight() const;
+    const std::vector<JniColumn>& jni_columns() const { return _jni_columns; }
+    TFileRangeDesc _current_range;
+
+private:
+    // init
+    void _init_profile();
+    std::string _connector_name() const;
+    // open
+    Status _open_jni_scanner();
+    void _reset_split_state(JNIEnv* env);
+    void _prepare_jni_scanner_schema();
+    Status _register_jni_class_functions_once(JNIEnv* env);
+    Status _create_jni_scanner_object(JNIEnv* env, int batch_size);
+    // get_next
+    Status _get_next_jni_block(size_t* rows, bool* eof);
+    Status _fill_jni_block(JniDataBridge::TableMetaAddress& table_meta, size_t num_rows);
+
+    Status _close_jni_scanner();
+
+    std::map<std::string, std::string> _scanner_params;
+    std::vector<JniColumn> _jni_columns;
+    Block _jni_block_template;
+
+    bool _closed = false;
+    bool _scanner_opened = false;
+    bool _eof = false;
+
+    RuntimeProfile::Counter* _open_scanner_time = nullptr;
+    RuntimeProfile::Counter* _java_scan_time = nullptr;
+    RuntimeProfile::Counter* _java_append_data_time = nullptr;
+    RuntimeProfile::Counter* _java_create_vector_table_time = nullptr;
+    RuntimeProfile::Counter* _fill_block_time = nullptr;
+    RuntimeProfile::ConditionCounter* _max_time_split_weight_counter = nullptr;
+
+    int64_t _jni_scanner_open_watcher = 0;
+    int64_t _java_scan_watcher = 0;
+    int64_t _fill_block_watcher = 0;
+
+    Jni::GlobalClass _jni_scanner_cls;
+    Jni::GlobalObject _jni_scanner_obj;
+    Jni::MethodId _jni_scanner_constructor;
+    Jni::MethodId _jni_scanner_open;
+    Jni::MethodId _jni_scanner_get_append_data_time;
+    Jni::MethodId _jni_scanner_get_create_vector_table_time;
+    Jni::MethodId _jni_scanner_get_next_batch;
+    Jni::MethodId _jni_scanner_close;
+    Jni::MethodId _jni_scanner_release_column;
+    Jni::MethodId _jni_scanner_release_table;
+    Jni::MethodId _jni_scanner_get_statistics;
+    Jni::MethodId _jni_scanner_set_batch_size;
+};
+
+} // namespace doris::format
diff --git a/be/src/format_v2/jni/max_compute_jni_reader.cpp b/be/src/format_v2/jni/max_compute_jni_reader.cpp
new file mode 100644
index 00000000000000..a26e9e229b5d82
--- /dev/null
+++ b/be/src/format_v2/jni/max_compute_jni_reader.cpp
@@ -0,0 +1,149 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/max_compute_jni_reader.h"
+
+#include "core/block/block.h"
+#include "exprs/vexpr_context.h"
+
+namespace doris::format::max_compute {
+
+MaxComputeJniReader::MaxComputeJniReader(const doris::MaxComputeTableDescriptor* table_desc)
+        : _table_desc(table_desc) {}
+
+Status MaxComputeJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError("missing table_format_params for max compute jni reader");
+    }
+    if (!range.table_format_params.__isset.max_compute_params) {
+        return Status::InternalError("missing max_compute_params for max compute jni reader");
+    }
+    const auto& max_compute_params = range.table_format_params.max_compute_params;
+    if (!max_compute_params.__isset.session_id || max_compute_params.session_id.empty()) {
+        return Status::InternalError(
+                "missing session_id for max compute jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!max_compute_params.__isset.table_batch_read_session ||
+        max_compute_params.table_batch_read_session.empty()) {
+        return Status::InternalError(
+                "missing table_batch_read_session for max compute jni reader, possibly caused "
+                "by FE/BE protocol mismatch");
+    }
+    if (!range.__isset.start_offset) {
+        return Status::InternalError(
+                "missing start_offset for max compute jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!range.__isset.size) {
+        return Status::InternalError(
+                "missing size for max compute jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (_scan_params == nullptr) {
+        return Status::InternalError(
+                "missing scan params for max compute jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    return Status::OK();
+}
+
+std::string MaxComputeJniReader::connector_class() const {
+    return "org/apache/doris/maxcompute/MaxComputeJniScanner";
+}
+
+Status MaxComputeJniReader::build_scanner_params(std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    DORIS_CHECK(_table_desc != nullptr);
+    params->clear();
+
+    *params = _table_desc->properties();
+    (*params)["endpoint"] = _table_desc->endpoint();
+    (*params)["quota"] = _table_desc->quota();
+    (*params)["project"] = _table_desc->project();
+    (*params)["table"] = _table_desc->table();
+
+    const auto& max_compute_params = _current_range.table_format_params.max_compute_params;
+    (*params)["session_id"] = max_compute_params.session_id;
+    (*params)["scan_serializer"] = max_compute_params.table_batch_read_session;
+    (*params)["start_offset"] = std::to_string(_current_range.start_offset);
+    (*params)["split_size"] = std::to_string(_current_range.size);
+    (*params)["connect_timeout"] = std::to_string(max_compute_params.connect_timeout);
+    (*params)["read_timeout"] = std::to_string(max_compute_params.read_timeout);
+    (*params)["retry_count"] = std::to_string(max_compute_params.retry_times);
+    return Status::OK();
+}
+
+Status MaxComputeJniReader::build_jni_columns(
+        std::vector<format::JniTableReader::JniColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    columns->reserve(_projected_columns.size());
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        if (table_column.is_partition_key &&
+            find_partition_value(table_column, _partition_values) != nullptr) {
+            continue;
+        }
+        columns->push_back({
+                .java_name = table_column.name,
+                .output_index = i,
+                .output_type = table_column.type,
+                .transfer_type = table_column.type,
+                .replace_type = "not_replace",
+        });
+    }
+    return Status::OK();
+}
+
+Status MaxComputeJniReader::finalize_jni_block(Block* jni_block, Block* output_block,
+                                               size_t* rows) {
+    DORIS_CHECK(jni_block != nullptr);
+    DORIS_CHECK(output_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    const auto original_rows = *rows;
+
+    const auto& columns = jni_columns();
+    DORIS_CHECK(columns.size() == jni_block->columns());
+    for (size_t i = 0; i < columns.size(); ++i) {
+        const auto& column = columns[i];
+        DORIS_CHECK(column.output_index < output_block->columns());
+        output_block->get_by_position(column.output_index).type = column.output_type;
+        output_block->replace_by_position(column.output_index,
+                                          jni_block->get_by_position(i).column);
+    }
+
+    for (size_t i = 0; i < _projected_columns.size(); ++i) {
+        const auto& table_column = _projected_columns[i];
+        const auto* partition_value = find_partition_value(table_column, _partition_values);
+        if (!table_column.is_partition_key || partition_value == nullptr) {
+            continue;
+        }
+        output_block->get_by_position(i).type = table_column.type;
+        output_block->replace_by_position(
+                i, table_column.type->create_column_const(original_rows, *partition_value));
+    }
+    DORIS_CHECK(output_block->rows() == original_rows);
+    if (!_conjuncts.empty()) {
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_conjuncts, output_block, output_block->columns()));
+    }
+    *rows = output_block->rows();
+    return Status::OK();
+}
+
+} // namespace doris::format::max_compute
diff --git a/be/src/format_v2/jni/max_compute_jni_reader.h b/be/src/format_v2/jni/max_compute_jni_reader.h
new file mode 100644
index 00000000000000..8addce07988e4c
--- /dev/null
+++ b/be/src/format_v2/jni/max_compute_jni_reader.h
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris {
+class MaxComputeTableDescriptor;
+} // namespace doris
+
+namespace doris::format::max_compute {
+
+class MaxComputeJniReader final : public format::JniTableReader {
+public:
+    explicit MaxComputeJniReader(const doris::MaxComputeTableDescriptor* table_desc);
+    ~MaxComputeJniReader() override = default;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+    Status build_jni_columns(
+            std::vector<format::JniTableReader::JniColumn>* columns) const override;
+    Status finalize_jni_block(Block* jni_block, Block* output_block, size_t* rows) override;
+
+private:
+    const doris::MaxComputeTableDescriptor* _table_desc = nullptr;
+};
+
+} // namespace doris::format::max_compute
diff --git a/be/src/format_v2/jni/paimon_jni_reader.cpp b/be/src/format_v2/jni/paimon_jni_reader.cpp
new file mode 100644
index 00000000000000..c68cc7b952a5d5
--- /dev/null
+++ b/be/src/format_v2/jni/paimon_jni_reader.cpp
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/paimon_jni_reader.h"
+
+#include <string_view>
+
+namespace doris::format::paimon {
+namespace {
+
+constexpr std::string_view PAIMON_OPTION_PREFIX = "paimon.";
+constexpr std::string_view HADOOP_OPTION_PREFIX = "hadoop.";
+
+} // namespace
+
+Status PaimonJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError("missing table_format_params for paimon jni reader");
+    }
+    if (!range.table_format_params.__isset.paimon_params) {
+        return Status::InternalError("missing paimon_params for paimon jni reader");
+    }
+    if (!range.table_format_params.paimon_params.__isset.paimon_split ||
+        range.table_format_params.paimon_params.paimon_split.empty()) {
+        return Status::InternalError(
+                "missing paimon_split for paimon jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (!range.table_format_params.paimon_params.__isset.reader_type ||
+        range.table_format_params.paimon_params.reader_type != TPaimonReaderType::PAIMON_JNI) {
+        return Status::InternalError(
+                "invalid reader_type for paimon jni reader, possibly caused by FE/BE protocol "
+                "mismatch");
+    }
+    if (_scan_params == nullptr || !_scan_params->__isset.serialized_table ||
+        _scan_params->serialized_table.empty()) {
+        return Status::InternalError(
+                "missing serialized_table for paimon jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!_scan_params->__isset.paimon_predicate || _scan_params->paimon_predicate.empty()) {
+        return Status::InternalError(
+                "missing paimon_predicate for paimon jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    return Status::OK();
+}
+
+std::string PaimonJniReader::connector_class() const {
+    return "org/apache/doris/paimon/PaimonJniScanner";
+}
+
+Status PaimonJniReader::build_scanner_params(std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    DORIS_CHECK(_scan_params != nullptr);
+    params->clear();
+
+    const auto& paimon_params = _current_range.table_format_params.paimon_params;
+    (*params)["paimon_split"] = paimon_params.paimon_split;
+    (*params)["paimon_predicate"] = _scan_params->paimon_predicate;
+    (*params)["serialized_table"] = _scan_params->serialized_table;
+
+    if (_scan_params->__isset.paimon_options && !_scan_params->paimon_options.empty()) {
+        for (const auto& kv : _scan_params->paimon_options) {
+            (*params)[std::string(PAIMON_OPTION_PREFIX) + kv.first] = kv.second;
+        }
+    }
+    if (_scan_params->__isset.properties && !_scan_params->properties.empty()) {
+        for (const auto& kv : _scan_params->properties) {
+            (*params)[std::string(HADOOP_OPTION_PREFIX) + kv.first] = kv.second;
+        }
+    }
+    // TODO: Remove legacy split-level paimon_predicate, paimon_options and hadoop_conf from thrift
+    // after all readers stop using them. Format V2 Paimon JNI consumes the scan-level fields
+    // planned by current FE and intentionally does not fall back to deprecated split-level fields.
+    return Status::OK();
+}
+
+} // namespace doris::format::paimon
diff --git a/be/src/format_v2/jni/paimon_jni_reader.h b/be/src/format_v2/jni/paimon_jni_reader.h
new file mode 100644
index 00000000000000..f789edb0b17bd0
--- /dev/null
+++ b/be/src/format_v2/jni/paimon_jni_reader.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::paimon {
+
+class PaimonJniReader final : public format::JniTableReader {
+public:
+    ~PaimonJniReader() override = default;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+};
+
+} // namespace doris::format::paimon
diff --git a/be/src/format_v2/jni/trino_connector_jni_reader.cpp b/be/src/format_v2/jni/trino_connector_jni_reader.cpp
new file mode 100644
index 00000000000000..11c9945c5dea16
--- /dev/null
+++ b/be/src/format_v2/jni/trino_connector_jni_reader.cpp
@@ -0,0 +1,141 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/jni/trino_connector_jni_reader.h"
+
+#include <string_view>
+
+#include "common/config.h"
+#include "util/jni-util.h"
+
+namespace doris::format::trino_connector {
+namespace {
+
+constexpr std::string_view TRINO_CONNECTOR_OPTION_PREFIX = "trino.";
+constexpr std::string_view TRINO_CONNECTOR_NAME = "connector.name";
+
+} // namespace
+
+Status TrinoConnectorJniReader::validate_scan_range(const TFileRangeDesc& range) const {
+    if (!range.__isset.table_format_params) {
+        return Status::InternalError("missing table_format_params for trino connector jni reader");
+    }
+    if (!range.table_format_params.__isset.trino_connector_params) {
+        return Status::InternalError(
+                "missing trino_connector_params for trino connector jni reader");
+    }
+
+    const auto& trino_params = range.table_format_params.trino_connector_params;
+    if (!trino_params.__isset.catalog_name || trino_params.catalog_name.empty()) {
+        return Status::InternalError(
+                "missing catalog_name for trino connector jni reader, possibly caused by FE/BE "
+                "protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_options ||
+        !trino_params.trino_connector_options.contains(std::string(TRINO_CONNECTOR_NAME))) {
+        return Status::InternalError(
+                "missing trino connector.name option for trino connector jni reader, possibly "
+                "caused by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_split || trino_params.trino_connector_split.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_split for trino connector jni reader, possibly caused "
+                "by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_table_handle ||
+        trino_params.trino_connector_table_handle.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_table_handle for trino connector jni reader, possibly "
+                "caused by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_column_handles ||
+        trino_params.trino_connector_column_handles.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_column_handles for trino connector jni reader, possibly "
+                "caused by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_column_metadata ||
+        trino_params.trino_connector_column_metadata.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_column_metadata for trino connector jni reader, possibly "
+                "caused by FE/BE protocol mismatch");
+    }
+    if (!trino_params.__isset.trino_connector_trascation_handle ||
+        trino_params.trino_connector_trascation_handle.empty()) {
+        return Status::InternalError(
+                "missing trino_connector_trascation_handle for trino connector jni reader, "
+                "possibly caused by FE/BE protocol mismatch");
+    }
+    return Status::OK();
+}
+
+Status TrinoConnectorJniReader::prepare_split(const format::SplitReadOptions& options) {
+    RETURN_IF_ERROR(validate_scan_range(options.current_range));
+    RETURN_IF_ERROR(_set_spi_plugins_dir());
+    return format::JniTableReader::prepare_split(options);
+}
+
+std::string TrinoConnectorJniReader::connector_class() const {
+    return "org/apache/doris/trinoconnector/TrinoConnectorJniScanner";
+}
+
+Status TrinoConnectorJniReader::build_scanner_params(
+        std::map<std::string, std::string>* params) const {
+    DORIS_CHECK(params != nullptr);
+    params->clear();
+
+    const auto& trino_params = _current_range.table_format_params.trino_connector_params;
+    (*params)["catalog_name"] = trino_params.catalog_name;
+    (*params)["db_name"] = trino_params.db_name;
+    (*params)["table_name"] = trino_params.table_name;
+    (*params)["trino_connector_split"] = trino_params.trino_connector_split;
+    (*params)["trino_connector_table_handle"] = trino_params.trino_connector_table_handle;
+    (*params)["trino_connector_column_handles"] = trino_params.trino_connector_column_handles;
+    (*params)["trino_connector_column_metadata"] = trino_params.trino_connector_column_metadata;
+    (*params)["trino_connector_predicate"] = trino_params.trino_connector_predicate;
+    (*params)["trino_connector_trascation_handle"] = trino_params.trino_connector_trascation_handle;
+
+    for (const auto& kv : trino_params.trino_connector_options) {
+        (*params)[std::string(TRINO_CONNECTOR_OPTION_PREFIX) + kv.first] = kv.second;
+    }
+    return Status::OK();
+}
+
+Status TrinoConnectorJniReader::_set_spi_plugins_dir() const {
+    JNIEnv* env = nullptr;
+    RETURN_IF_ERROR(Jni::Env::Get(&env));
+
+    Jni::LocalClass plugin_loader_cls;
+    const std::string plugin_loader_class =
+            "org/apache/doris/trinoconnector/TrinoConnectorPluginLoader";
+    RETURN_IF_ERROR(
+            Jni::Util::get_jni_scanner_class(env, plugin_loader_class.c_str(), &plugin_loader_cls));
+
+    Jni::MethodId set_plugins_dir_method;
+    RETURN_IF_ERROR(plugin_loader_cls.get_static_method(
+            env, "setPluginsDir", "(Ljava/lang/String;)V", &set_plugins_dir_method));
+
+    Jni::LocalString trino_connector_plugin_path;
+    RETURN_IF_ERROR(Jni::LocalString::new_string(
+            env, doris::config::trino_connector_plugin_dir.c_str(), &trino_connector_plugin_path));
+
+    return plugin_loader_cls.call_static_void_method(env, set_plugins_dir_method)
+            .with_arg(trino_connector_plugin_path)
+            .call();
+}
+
+} // namespace doris::format::trino_connector
diff --git a/be/src/format_v2/jni/trino_connector_jni_reader.h b/be/src/format_v2/jni/trino_connector_jni_reader.h
new file mode 100644
index 00000000000000..a20c3a5f62ef96
--- /dev/null
+++ b/be/src/format_v2/jni/trino_connector_jni_reader.h
@@ -0,0 +1,44 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <map>
+#include <string>
+
+#include "common/status.h"
+#include "format_v2/jni/jni_table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::trino_connector {
+
+class TrinoConnectorJniReader final : public format::JniTableReader {
+public:
+    ~TrinoConnectorJniReader() override = default;
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+
+protected:
+    std::string connector_class() const override;
+    Status validate_scan_range(const TFileRangeDesc& range) const override;
+    Status build_scanner_params(std::map<std::string, std::string>* params) const override;
+
+private:
+    Status _set_spi_plugins_dir() const;
+};
+
+} // namespace doris::format::trino_connector
diff --git a/be/src/format_v2/json/json_reader.cpp b/be/src/format_v2/json/json_reader.cpp
new file mode 100644
index 00000000000000..f0219bb7d85345
--- /dev/null
+++ b/be/src/format_v2/json/json_reader.cpp
@@ -0,0 +1,1123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/json/json_reader.h"
+
+#include <rapidjson/document.h>
+
+#include <algorithm>
+#include <cstring>
+#include <limits>
+#include <map>
+#include <string_view>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_array.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "format/file_reader/new_plain_text_line_reader.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/materialized_reader_util.h"
+#include "io/file_factory.h"
+#include "io/fs/file_reader.h"
+#include "io/fs/stream_load_pipe.h"
+#include "io/fs/tracing_file_reader.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/decompressor.h"
+#include "util/slice.h"
+
+namespace doris::format::json {
+namespace {
+
+DataTypePtr json_file_type_from_slot_type(const DataTypePtr& type) {
+    if (type == nullptr) {
+        return nullptr;
+    }
+
+    // Text-like file readers expose CHAR/VARCHAR as STRING and let the table column mapper cast to
+    // the destination slot type. JSON follows the same file-schema convention so that v2 mapping
+    // behaves consistently across text formats.
+    const bool is_nullable = type->is_nullable();
+    const auto nested_type = remove_nullable(type);
+    DataTypePtr file_type;
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_CHAR:
+    case TYPE_VARCHAR:
+        file_type = std::make_shared<DataTypeString>();
+        break;
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        file_type = std::make_shared<DataTypeArray>(
+                json_file_type_from_slot_type(array_type->get_nested_type()));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        file_type = std::make_shared<DataTypeMap>(
+                json_file_type_from_slot_type(map_type->get_key_type()),
+                json_file_type_from_slot_type(map_type->get_value_type()));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        DataTypes file_children;
+        file_children.reserve(struct_type->get_elements().size());
+        for (const auto& child_type : struct_type->get_elements()) {
+            file_children.push_back(json_file_type_from_slot_type(child_type));
+        }
+        file_type =
+                std::make_shared<DataTypeStruct>(file_children, struct_type->get_element_names());
+        break;
+    }
+    default:
+        file_type = nested_type;
+        break;
+    }
+
+    return is_nullable ? make_nullable(file_type) : file_type;
+}
+
+ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id);
+
+std::vector<ColumnDefinition> synthesize_file_children_from_type(const DataTypePtr& type) {
+    std::vector<ColumnDefinition> children;
+    if (type == nullptr) {
+        return children;
+    }
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        children.push_back(synthetic_file_child("element", array_type->get_nested_type(), 0));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        children.push_back(synthetic_file_child("key", map_type->get_key_type(), 0));
+        children.push_back(synthetic_file_child("value", map_type->get_value_type(), 1));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        children.reserve(struct_type->get_elements().size());
+        for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) {
+            children.push_back(synthetic_file_child(struct_type->get_element_name(idx),
+                                                    struct_type->get_element(idx),
+                                                    cast_set<int32_t>(idx)));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return children;
+}
+
+ColumnDefinition synthetic_file_child(const std::string& name, DataTypePtr type, int32_t local_id) {
+    ColumnDefinition child;
+    child.identifier = Field::create_field<TYPE_STRING>(name);
+    child.local_id = local_id;
+    child.name = name;
+    child.type = std::move(type);
+    child.children = synthesize_file_children_from_type(child.type);
+    return child;
+}
+
+std::string lower_key(std::string_view key) {
+    std::string lowered(key.data(), key.size());
+    std::transform(lowered.begin(), lowered.end(), lowered.begin(), ::tolower);
+    return lowered;
+}
+
+} // namespace
+
+JsonReader::JsonReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                       std::unique_ptr<io::FileDescription>& file_description,
+                       std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                       const TFileScanRangeParams* scan_params, const TFileRangeDesc& range,
+                       const std::vector<SlotDescriptor*>& file_slot_descs,
+                       TFileCompressType::type range_compress_type,
+                       std::optional<TUniqueId> stream_load_id)
+        : FileReader(system_properties, file_description, std::move(io_ctx), profile),
+          _scan_params(scan_params),
+          _range(range),
+          _source_file_slot_descs(file_slot_descs),
+          _range_compress_type(range_compress_type),
+          _stream_load_id(std::move(stream_load_id)) {}
+
+JsonReader::~JsonReader() {
+    static_cast<void>(close());
+}
+
+Status JsonReader::init(RuntimeState* state) {
+    _runtime_state = state;
+    if (_scan_params == nullptr) {
+        return Status::InvalidArgument("JSON v2 reader requires scan params");
+    }
+    if (_file_description == nullptr) {
+        return Status::InvalidArgument("JSON v2 reader requires file description");
+    }
+    if (_runtime_state == nullptr) {
+        return Status::InvalidArgument("JSON v2 reader requires runtime state");
+    }
+    if (!_scan_params->__isset.file_attributes) {
+        return Status::InvalidArgument("JSON v2 reader requires file attributes");
+    }
+
+    const auto& attributes = _scan_params->file_attributes;
+    if (attributes.__isset.text_params && attributes.text_params.__isset.line_delimiter) {
+        _line_delimiter = attributes.text_params.line_delimiter;
+    } else {
+        _line_delimiter = "\n";
+    }
+    _line_delimiter_length = _line_delimiter.size();
+    _jsonpaths = attributes.__isset.jsonpaths ? attributes.jsonpaths : "";
+    _json_root = attributes.__isset.json_root ? attributes.json_root : "";
+    _read_json_by_line = attributes.__isset.read_json_by_line && attributes.read_json_by_line;
+    _strip_outer_array = attributes.__isset.strip_outer_array && attributes.strip_outer_array;
+    _num_as_string = attributes.__isset.num_as_string && attributes.num_as_string;
+    _fuzzy_parse = attributes.__isset.fuzzy_parse && attributes.fuzzy_parse;
+    _openx_json_ignore_malformed = attributes.__isset.openx_json_ignore_malformed &&
+                                   attributes.openx_json_ignore_malformed;
+    _is_hive_table = _range.table_format_params.table_format_type == "hive";
+    _file_compress_type = _range_compress_type != TFileCompressType::UNKNOWN
+                                  ? _range_compress_type
+                                  : _scan_params->compress_type;
+
+    _source_serdes = create_data_type_serdes(_source_file_slot_descs);
+    _file_schema.clear();
+    _file_schema.reserve(_source_file_slot_descs.size());
+    // JSON has no physical footer schema. The FE file slots are therefore the authoritative schema
+    // for both field names and source local ids.
+    for (size_t idx = 0; idx < _source_file_slot_descs.size(); ++idx) {
+        const auto* slot = _source_file_slot_descs[idx];
+        DORIS_CHECK(slot != nullptr);
+        ColumnDefinition field;
+        field.identifier = Field::create_field<TYPE_STRING>(slot->col_name());
+        field.local_id = cast_set<int32_t>(idx);
+        field.name = slot->col_name();
+        field.type = json_file_type_from_slot_type(slot->get_data_type_ptr());
+        field.children = synthesize_file_children_from_type(field.type);
+        _file_schema.push_back(std::move(field));
+    }
+    _eof = false;
+    return Status::OK();
+}
+
+Status JsonReader::get_schema(std::vector<ColumnDefinition>* file_schema) const {
+    if (file_schema == nullptr) {
+        return Status::InvalidArgument("JSON v2 file_schema is null");
+    }
+    *file_schema = _file_schema;
+    return Status::OK();
+}
+
+std::unique_ptr<TableColumnMapper> JsonReader::create_column_mapper(
+        TableColumnMapperOptions options) const {
+    return std::make_unique<MaterializedColumnMapper>(std::move(options));
+}
+
+Status JsonReader::open(std::shared_ptr<FileScanRequest> request) {
+    RETURN_IF_ERROR(FileReader::open(std::move(request)));
+    DORIS_CHECK(_request != nullptr);
+    RETURN_IF_ERROR(_build_requested_columns(*_request, &_requested_columns));
+    _slot_name_to_index.clear();
+    _slot_name_to_index.reserve(_requested_columns.size());
+    for (size_t idx = 0; idx < _requested_columns.size(); ++idx) {
+        auto name = _requested_columns[idx].slot_desc->col_name();
+        _slot_name_to_index.emplace(_is_hive_table ? lower_key(name) : name, idx);
+    }
+    _previous_positions.clear();
+    _reader_range = _json_range();
+    RETURN_IF_ERROR(_open_file_reader());
+    RETURN_IF_ERROR(_create_decompressor());
+    if (_read_json_by_line) {
+        RETURN_IF_ERROR(_create_line_reader());
+    }
+    RETURN_IF_ERROR(_parse_jsonpath_and_json_root());
+    _json_parser = std::make_unique<simdjson::ondemand::parser>();
+    _padding_buffer.resize(_padded_size);
+    _reader_eof = false;
+    _single_document_read = false;
+    _eof = false;
+    return Status::OK();
+}
+
+Status JsonReader::get_block(Block* file_block, size_t* rows, bool* eof) {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    if (_json_parser == nullptr || _physical_file_reader == nullptr) {
+        return Status::InternalError("JSON v2 reader is not open");
+    }
+
+    const auto batch_size = _runtime_state->batch_size();
+    const auto max_block_bytes = _runtime_state->preferred_block_size_bytes();
+    *rows = 0;
+    *eof = false;
+
+    while (file_block->rows() < batch_size && !_reader_eof &&
+           file_block->bytes() < max_block_bytes) {
+        if (_read_json_by_line && _skip_first_line) {
+            size_t skipped_size = 0;
+            const uint8_t* skipped_line = nullptr;
+            RETURN_IF_ERROR(_line_reader->read_line(&skipped_line, &skipped_size, &_reader_eof,
+                                                    _io_ctx.get()));
+            _skip_first_line = false;
+            continue;
+        }
+
+        const size_t original_rows = file_block->rows();
+        size_t size = 0;
+        bool is_empty_row = false;
+        Status st = Status::OK();
+        try {
+            st = _parse_next_json(&size, &_reader_eof);
+            if (st.ok() && !_reader_eof) {
+                if (size == 0) {
+                    is_empty_row = true;
+                } else {
+                    st = _extract_json_value(size, &_reader_eof, &is_empty_row);
+                }
+            }
+            if (st.ok() && !_reader_eof && !is_empty_row) {
+                st = _append_rows_from_current_value(file_block, &is_empty_row, &_reader_eof);
+            }
+        } catch (simdjson::simdjson_error& e) {
+            st = Status::DataQualityError("Parse json data failed. code: {}, error info: {}",
+                                          e.error(), e.what());
+        }
+        if (!st.ok()) {
+            RETURN_IF_ERROR(_handle_json_error(st, file_block, original_rows, &is_empty_row));
+        }
+        // An ignored or empty JSON object can produce no row. Avoid spinning forever on a document
+        // that was consumed but produced no materialized value.
+        if (!is_empty_row && file_block->rows() == original_rows) {
+            break;
+        }
+    }
+
+    *rows = file_block->rows();
+    RETURN_IF_ERROR(_apply_filters(file_block, rows));
+    _reader_statistics.read_rows += *rows;
+    *eof = _reader_eof && *rows == 0;
+    _eof = *eof;
+    return Status::OK();
+}
+
+Status JsonReader::close() {
+    if (_line_reader != nullptr) {
+        _line_reader->close();
+        _line_reader.reset();
+    }
+    _json_parser.reset();
+    _decompressor.reset();
+    _physical_file_reader.reset();
+    _tracing_file_reader.reset();
+    _file_reader.reset();
+    _requested_columns.clear();
+    _slot_name_to_index.clear();
+    _previous_positions.clear();
+    _cached_string_values.clear();
+    return Status::OK();
+}
+
+Status JsonReader::_build_requested_columns(const FileScanRequest& request,
+                                            std::vector<RequestedColumn>* columns) const {
+    DORIS_CHECK(columns != nullptr);
+    columns->clear();
+    // FileScanRequest stores a map from file-local id to output block position. Materialization is
+    // position-driven, so normalize it into a dense vector ordered by block position while keeping
+    // the original source index for jsonpaths.
+    std::vector<RequestedColumn> by_position(request.local_positions.size());
+    for (const auto& [file_column_id, block_position] : request.local_positions) {
+        if (file_column_id.value() < 0 ||
+            static_cast<size_t>(file_column_id.value()) >= _source_file_slot_descs.size()) {
+            return Status::InvalidArgument("JSON v2 request references unknown local column id {}",
+                                           file_column_id.value());
+        }
+        if (block_position.value() >= by_position.size()) {
+            return Status::InvalidArgument("JSON v2 request has invalid block position {}",
+                                           block_position.value());
+        }
+        const auto source_index = cast_set<size_t>(file_column_id.value());
+        RequestedColumn requested_column;
+        requested_column.file_column_id = file_column_id;
+        requested_column.block_position = block_position;
+        requested_column.source_index = source_index;
+        requested_column.slot_desc = _source_file_slot_descs[source_index];
+        requested_column.serde = _source_serdes[source_index];
+        by_position[block_position.value()] = std::move(requested_column);
+    }
+    for (size_t pos = 0; pos < by_position.size(); ++pos) {
+        if (!by_position[pos].file_column_id.is_valid()) {
+            return Status::InvalidArgument("JSON v2 request misses block position {}", pos);
+        }
+    }
+    *columns = std::move(by_position);
+    return Status::OK();
+}
+
+TFileRangeDesc JsonReader::_json_range() const {
+    auto range = _range;
+    range.__set_path(_file_description->path);
+    range.__set_start_offset(_file_description->range_start_offset);
+    range.__set_size(_file_description->range_size);
+    if (_file_description->file_size >= 0) {
+        range.__set_file_size(_file_description->file_size);
+    }
+    if (!_file_description->fs_name.empty()) {
+        range.__set_fs_name(_file_description->fs_name);
+    }
+    range.__set_file_cache_admission(_file_description->file_cache_admission);
+    if (_range_compress_type != TFileCompressType::UNKNOWN) {
+        range.__set_compress_type(_range_compress_type);
+    }
+    if (_stream_load_id.has_value()) {
+        range.__set_load_id(*_stream_load_id);
+    }
+    return range;
+}
+
+Status JsonReader::_open_file_reader() {
+    _current_offset = _reader_range.start_offset;
+    if (_current_offset != 0) {
+        --_current_offset;
+    }
+    if (_scan_params->file_type == TFileType::FILE_STREAM) {
+        if (!_stream_load_id.has_value()) {
+            return Status::InvalidArgument("JSON v2 stream reader requires load id");
+        }
+        RETURN_IF_ERROR(FileFactory::create_pipe_reader(*_stream_load_id, &_physical_file_reader,
+                                                        _runtime_state, /*need_schema=*/false));
+    } else {
+        _file_description->mtime =
+                _reader_range.__isset.modification_time ? _reader_range.modification_time : 0;
+        auto reader_options = FileFactory::get_reader_options(_runtime_state->query_options(),
+                                                              *_file_description);
+        auto file_reader = DORIS_TRY(FileFactory::create_file_reader(
+                *_system_properties, *_file_description, reader_options, _profile));
+        _physical_file_reader =
+                _io_ctx && _io_ctx->file_reader_stats
+                        ? std::make_shared<io::TracingFileReader>(std::move(file_reader),
+                                                                  _io_ctx->file_reader_stats)
+                        : file_reader;
+    }
+    _file_reader = _physical_file_reader;
+    _tracing_file_reader = _physical_file_reader;
+    return Status::OK();
+}
+
+Status JsonReader::_create_decompressor() {
+    return Decompressor::create_decompressor(_file_compress_type, &_decompressor);
+}
+
+Status JsonReader::_create_line_reader() {
+    int64_t size = _reader_range.size;
+    if (_reader_range.start_offset != 0) {
+        // Start one byte earlier and discard the first partial line, matching split semantics used
+        // by text readers.
+        ++size;
+        _skip_first_line = true;
+    } else {
+        _skip_first_line = false;
+    }
+    _line_reader = NewPlainTextLineReader::create_unique(
+            _profile, _physical_file_reader, _decompressor.get(),
+            std::make_shared<PlainTextLineReaderCtx>(_line_delimiter, _line_delimiter_length,
+                                                     false),
+            size, _current_offset);
+    return Status::OK();
+}
+
+Status JsonReader::_parse_jsonpath_and_json_root() {
+    _parsed_jsonpaths.clear();
+    _parsed_json_root.clear();
+    if (!_jsonpaths.empty()) {
+        rapidjson::Document jsonpaths_doc;
+        if (jsonpaths_doc.Parse(_jsonpaths.c_str(), _jsonpaths.length()).HasParseError() ||
+            !jsonpaths_doc.IsArray()) {
+            return Status::InvalidJsonPath("Invalid json path: {}", _jsonpaths);
+        }
+        for (int i = 0; i < jsonpaths_doc.Size(); ++i) {
+            const rapidjson::Value& path = jsonpaths_doc[i];
+            if (!path.IsString()) {
+                return Status::InvalidJsonPath("Invalid json path: {}", _jsonpaths);
+            }
+            std::string json_path = path.GetString();
+            if (json_path.size() == 1 && json_path[0] == '$') {
+                json_path.insert(1, ".");
+            }
+            std::vector<JsonPath> parsed_paths;
+            JsonFunctions::parse_json_paths(json_path, &parsed_paths);
+            _parsed_jsonpaths.push_back(std::move(parsed_paths));
+        }
+    }
+    if (!_json_root.empty()) {
+        std::string json_root = _json_root;
+        if (json_root.size() == 1 && json_root[0] == '$') {
+            json_root.insert(1, ".");
+        }
+        JsonFunctions::parse_json_paths(json_root, &_parsed_json_root);
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_read_one_document(size_t* size, bool* eof) {
+    DORIS_CHECK(size != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    *size = 0;
+    *eof = false;
+    if (_line_reader != nullptr) {
+        const uint8_t* line = nullptr;
+        RETURN_IF_ERROR(_line_reader->read_line(&line, size, eof, _io_ctx.get()));
+        if (*eof) {
+            return Status::OK();
+        }
+        _document_buffer.assign(reinterpret_cast<const char*>(line), *size);
+        return Status::OK();
+    }
+    // Non-line mode treats the split as one JSON document. This supports a single object or an
+    // array with strip_outer_array=true.
+    if (_single_document_read) {
+        *eof = true;
+        return Status::OK();
+    }
+    _single_document_read = true;
+    if (_scan_params->file_type == TFileType::FILE_STREAM) {
+        return _read_one_document_from_pipe(size);
+    }
+
+    auto read_size = _reader_range.size;
+    if (read_size <= 0 && _reader_range.__isset.file_size) {
+        read_size = _reader_range.file_size - _current_offset;
+    }
+    if (read_size <= 0) {
+        *eof = true;
+        return Status::OK();
+    }
+    _document_buffer.resize(cast_set<size_t>(read_size));
+    Slice result(_document_buffer.data(), _document_buffer.size());
+    RETURN_IF_ERROR(_physical_file_reader->read_at(_current_offset, result, size, _io_ctx.get()));
+    _document_buffer.resize(*size);
+    if (*size == 0) {
+        *eof = true;
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_read_one_document_from_pipe(size_t* read_size) {
+    auto* stream_load_pipe = dynamic_cast<io::StreamLoadPipe*>(_physical_file_reader.get());
+    if (stream_load_pipe == nullptr) {
+        return Status::InternalError("JSON v2 stream reader requires StreamLoadPipe");
+    }
+    DorisUniqueBufferPtr<uint8_t> file_buf;
+    RETURN_IF_ERROR(stream_load_pipe->read_one_message(&file_buf, read_size));
+    _document_buffer.assign(reinterpret_cast<const char*>(file_buf.get()), *read_size);
+    if (!stream_load_pipe->is_chunked_transfer()) {
+        return Status::OK();
+    }
+
+    while (true) {
+        DorisUniqueBufferPtr<uint8_t> next_buf;
+        size_t next_size = 0;
+        RETURN_IF_ERROR(stream_load_pipe->read_one_message(&next_buf, &next_size));
+        if (next_size == 0) {
+            break;
+        }
+        _document_buffer.append(reinterpret_cast<const char*>(next_buf.get()), next_size);
+        *read_size += next_size;
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_parse_next_json(size_t* size, bool* eof) {
+    RETURN_IF_ERROR(_read_one_document(size, eof));
+    if (*eof || *size == 0) {
+        return Status::OK();
+    }
+    if (*size >= 3 && static_cast<unsigned char>(_document_buffer[0]) == 0xEF &&
+        static_cast<unsigned char>(_document_buffer[1]) == 0xBB &&
+        static_cast<unsigned char>(_document_buffer[2]) == 0xBF) {
+        _document_buffer.erase(0, 3);
+        *size -= 3;
+    }
+    if (*size + simdjson::SIMDJSON_PADDING > _padded_size) {
+        _padded_size = *size + simdjson::SIMDJSON_PADDING;
+        _padding_buffer.resize(_padded_size);
+    }
+    // Ondemand values reference the input buffer. Keep the padded bytes in a member buffer until the
+    // current document is fully materialized.
+    std::memcpy(_padding_buffer.data(), _document_buffer.data(), *size);
+    _original_doc_size = *size;
+    const auto error =
+            _json_parser->iterate(std::string_view(_padding_buffer.data(), *size), _padded_size)
+                    .get(_original_json_doc);
+    if (error != simdjson::error_code::SUCCESS) {
+        return Status::DataQualityError(
+                "Parse json data for JsonDoc failed. code: {}, error info: {}", error,
+                simdjson::error_message(error));
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_extract_json_value(size_t size, bool* eof, bool* is_empty_row) {
+    DORIS_CHECK(eof != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    *is_empty_row = false;
+    if (size == 0 || *eof) {
+        *is_empty_row = true;
+        return Status::OK();
+    }
+    auto type_res = _original_json_doc.type();
+    if (type_res.error() != simdjson::error_code::SUCCESS) {
+        return Status::DataQualityError(
+                "Parse json data for JsonDoc failed. code: {}, error info: {}", type_res.error(),
+                simdjson::error_message(type_res.error()));
+    }
+    const auto type = type_res.value();
+    if (type != simdjson::ondemand::json_type::object &&
+        type != simdjson::ondemand::json_type::array) {
+        return Status::DataQualityError("Not an json object or json array");
+    }
+    _parsed_from_json_root = false;
+    if (!_parsed_json_root.empty() && type == simdjson::ondemand::json_type::object) {
+        // In object mode json_root can be applied once here. In outer-array mode each array element
+        // needs its own root extraction, which is handled while iterating the array.
+        simdjson::ondemand::object object = _original_json_doc;
+        Status st = JsonFunctions::extract_from_object(object, _parsed_json_root, &_json_value);
+        if (!st.ok()) {
+            return Status::DataQualityError("{}", st.to_string());
+        }
+        _parsed_from_json_root = true;
+    } else {
+        _json_value = _original_json_doc;
+    }
+
+    const auto value_type = _json_value.type().value();
+    if (value_type == simdjson::ondemand::json_type::array && !_strip_outer_array) {
+        return Status::DataQualityError(
+                "JSON data is array-object, `strip_outer_array` must be TRUE.");
+    }
+    if (value_type != simdjson::ondemand::json_type::array && _strip_outer_array) {
+        return Status::DataQualityError(
+                "JSON data is not an array-object, `strip_outer_array` must be FALSE.");
+    }
+    if (!_parsed_jsonpaths.empty() && _strip_outer_array &&
+        _json_value.count_elements().value() == 0) {
+        *is_empty_row = true;
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_append_rows_from_current_value(Block* block, bool* is_empty_row, bool* eof) {
+    if (_parsed_jsonpaths.empty()) {
+        return _append_simple_json_rows(block, is_empty_row, eof);
+    }
+    if (_strip_outer_array) {
+        return _append_flat_array_jsonpath_rows(block, is_empty_row, eof);
+    }
+    return _append_nested_jsonpath_row(block, is_empty_row, eof);
+}
+
+Status JsonReader::_append_simple_json_rows(Block* block, bool* is_empty_row, bool* eof) {
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    bool valid = false;
+    if (_json_value.type().value() == simdjson::ondemand::json_type::array) {
+        _array = _json_value.get_array();
+        if (_array.count_elements() == 0) {
+            *is_empty_row = true;
+            return Status::OK();
+        }
+        _array_iter = _array.begin();
+        while (_array_iter != _array.end()) {
+            simdjson::ondemand::object object_value = (*_array_iter).get_object();
+            RETURN_IF_ERROR(_set_column_values_from_object(&object_value, block, &valid));
+            ++_array_iter;
+            if (!valid) {
+                *is_empty_row = true;
+                return Status::OK();
+            }
+        }
+    } else {
+        simdjson::ondemand::object object_value = _json_value.get_object();
+        RETURN_IF_ERROR(_set_column_values_from_object(&object_value, block, &valid));
+        if (!valid) {
+            *is_empty_row = true;
+            return Status::OK();
+        }
+    }
+    *is_empty_row = false;
+    return Status::OK();
+}
+
+Status JsonReader::_append_flat_array_jsonpath_rows(Block* block, bool* is_empty_row, bool* eof) {
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    const size_t original_rows = block->rows();
+    bool valid = true;
+    _array = _json_value.get_array();
+    _array_iter = _array.begin();
+    while (_array_iter != _array.end()) {
+        simdjson::ondemand::object object_value = (*_array_iter).get_object();
+        if (!_parsed_from_json_root && !_parsed_json_root.empty()) {
+            // For strip_outer_array, json_root is evaluated against each element. Elements without
+            // the requested root do not produce rows, matching the load reader behavior.
+            simdjson::ondemand::value rooted_value;
+            Status st = JsonFunctions::extract_from_object(object_value, _parsed_json_root,
+                                                           &rooted_value);
+            if (!st.ok()) {
+                if (st.is<ErrorCode::NOT_FOUND>()) {
+                    ++_array_iter;
+                    continue;
+                }
+                return st;
+            }
+            if (rooted_value.type().value() != simdjson::ondemand::json_type::object) {
+                ++_array_iter;
+                continue;
+            }
+            object_value = rooted_value.get_object();
+        }
+        RETURN_IF_ERROR(_write_columns_by_jsonpath(&object_value, block, &valid));
+        ++_array_iter;
+    }
+    *is_empty_row = block->rows() == original_rows;
+    return Status::OK();
+}
+
+Status JsonReader::_append_nested_jsonpath_row(Block* block, bool* is_empty_row, bool* eof) {
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    if (_json_value.type().value() != simdjson::ondemand::json_type::object) {
+        return Status::DataQualityError("Not object item");
+    }
+    bool valid = true;
+    simdjson::ondemand::object object_value = _json_value.get_object();
+    RETURN_IF_ERROR(_write_columns_by_jsonpath(&object_value, block, &valid));
+    *is_empty_row = !valid;
+    return Status::OK();
+}
+
+Status JsonReader::_set_column_values_from_object(simdjson::ondemand::object* object_value,
+                                                  Block* block, bool* valid) {
+    DORIS_CHECK(object_value != nullptr);
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(valid != nullptr);
+    std::vector<bool> seen_columns(block->columns(), false);
+    const size_t cur_row_count = block->rows();
+    bool has_valid_value = false;
+    size_t key_index = 0;
+
+    for (auto field : *object_value) {
+        std::string_view key = field.unescaped_key().value();
+        const size_t column_index = _column_index(key, key_index++);
+        if (column_index == static_cast<size_t>(-1)) {
+            continue;
+        }
+        if (seen_columns[column_index]) {
+            if (_is_hive_table) {
+                // Hive JSON keeps the last duplicate key ignoring case. The earlier value has
+                // already been appended, so remove it before writing the replacement.
+                _pop_back_last_inserted_value(block, column_index);
+            } else {
+                continue;
+            }
+        }
+        simdjson::ondemand::value value = field.value().value();
+        const auto& requested = _requested_columns[column_index];
+        auto* column_ptr = block->get_by_position(column_index).column->assert_mutable().get();
+        RETURN_IF_ERROR(_write_data_to_column<false>(
+                value, requested.slot_desc->get_data_type_ptr(), column_ptr,
+                requested.slot_desc->col_name(), requested.serde, valid));
+        if (!*valid) {
+            return Status::OK();
+        }
+        seen_columns[column_index] = true;
+        has_valid_value = true;
+    }
+
+    for (size_t i = 0; i < _requested_columns.size(); ++i) {
+        if (seen_columns[i]) {
+            continue;
+        }
+        auto* column_ptr = block->get_by_position(i).column->assert_mutable().get();
+        RETURN_IF_ERROR(_fill_missing_column(_requested_columns[i], column_ptr, valid));
+        if (!*valid) {
+            _truncate_block_to_rows(block, cur_row_count);
+            return Status::OK();
+        }
+    }
+    *valid = true;
+    if (!has_valid_value) {
+        return Status::OK();
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_write_columns_by_jsonpath(simdjson::ondemand::object* object_value,
+                                              Block* block, bool* valid) {
+    DORIS_CHECK(object_value != nullptr);
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(valid != nullptr);
+    bool has_valid_value = false;
+    const size_t cur_row_count = block->rows();
+    _cached_string_values.clear();
+
+    for (size_t i = 0; i < _requested_columns.size(); ++i) {
+        const auto& requested = _requested_columns[i];
+        auto* column_ptr = block->get_by_position(i).column->assert_mutable().get();
+        simdjson::ondemand::value json_value;
+        Status st = Status::OK();
+        if (requested.source_index < _parsed_jsonpaths.size()) {
+            st = JsonFunctions::extract_from_object(
+                    *object_value, _parsed_jsonpaths[requested.source_index], &json_value);
+            if (!st.ok() && !st.is<ErrorCode::NOT_FOUND>()) {
+                return st;
+            }
+        }
+        if (_is_root_path_for_column(requested)) {
+            // A root jsonpath means "materialize the whole current JSON document" instead of a
+            // field under it. Use the original bytes so callers receive the same document text.
+            if (is_column_nullable(*column_ptr)) {
+                auto* nullable_column = assert_cast<ColumnNullable*>(column_ptr);
+                nullable_column->get_null_map_data().push_back(0);
+                auto* column_string =
+                        assert_cast<ColumnString*>(nullable_column->get_nested_column_ptr().get());
+                column_string->insert_data(_padding_buffer.data(), _original_doc_size);
+            } else {
+                auto* column_string = assert_cast<ColumnString*>(column_ptr);
+                column_string->insert_data(_padding_buffer.data(), _original_doc_size);
+            }
+            has_valid_value = true;
+        } else if (requested.source_index >= _parsed_jsonpaths.size() ||
+                   st.is<ErrorCode::NOT_FOUND>()) {
+            RETURN_IF_ERROR(_fill_missing_column(requested, column_ptr, valid));
+            if (!*valid) {
+                _truncate_block_to_rows(block, cur_row_count);
+                return Status::OK();
+            }
+        } else {
+            RETURN_IF_ERROR(_write_data_to_column<true>(
+                    json_value, requested.slot_desc->get_data_type_ptr(), column_ptr,
+                    requested.slot_desc->col_name(), requested.serde, valid));
+            if (!*valid) {
+                _truncate_block_to_rows(block, cur_row_count);
+                return Status::OK();
+            }
+            has_valid_value = true;
+        }
+    }
+
+    if (!has_valid_value) {
+        // jsonpaths can legally match nothing. Roll the row back so an all-missing path set does
+        // not create a synthetic row of nulls.
+        _truncate_block_to_rows(block, cur_row_count);
+        *valid = false;
+        return Status::OK();
+    }
+    *valid = true;
+    return Status::OK();
+}
+
+template <bool use_string_cache>
+Status JsonReader::_write_data_to_column(simdjson::ondemand::value& value,
+                                         const DataTypePtr& type_desc, IColumn* column_ptr,
+                                         const std::string& column_name,
+                                         const DataTypeSerDeSPtr& serde, bool* valid) {
+    ColumnNullable* nullable_column = nullptr;
+    IColumn* data_column_ptr = column_ptr;
+    DataTypeSerDeSPtr data_serde = serde;
+    const auto value_type = value.type().value();
+
+    if (is_column_nullable(*column_ptr)) {
+        nullable_column = assert_cast<ColumnNullable*>(column_ptr);
+        data_column_ptr = nullable_column->get_nested_column().get_ptr().get();
+        if (type_desc->is_nullable()) {
+            data_serde = serde->get_nested_serdes()[0];
+        }
+        if (value_type == simdjson::ondemand::json_type::null) {
+            nullable_column->insert_default();
+            *valid = true;
+            return Status::OK();
+        }
+    } else if (value_type == simdjson::ondemand::json_type::null) {
+        return Status::DataQualityError("Json value is null, but the column `{}` is not nullable.",
+                                        column_name);
+    }
+
+    const auto primitive_type = type_desc->get_primitive_type();
+    if (!is_complex_type(primitive_type)) {
+        if (value_type == simdjson::ondemand::json_type::string) {
+            std::string_view value_string;
+            if constexpr (use_string_cache) {
+                const auto cache_key = value.raw_json().value();
+                if (_cached_string_values.contains(cache_key)) {
+                    value_string = _cached_string_values[cache_key];
+                } else {
+                    value_string = value.get_string();
+                    _cached_string_values.emplace(cache_key, value_string);
+                }
+            } else {
+                value_string = value.get_string();
+            }
+            Slice slice {value_string.data(), value_string.size()};
+            RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice,
+                                                                       _serde_options));
+        } else if (value_type == simdjson::ondemand::json_type::boolean) {
+            const char* str_value = value.get_bool() ? "1" : "0";
+            Slice slice {str_value, 1};
+            RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice,
+                                                                       _serde_options));
+        } else {
+            std::string_view json_str = simdjson::to_json_string(value);
+            Slice slice {json_str.data(), json_str.size()};
+            RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice,
+                                                                       _serde_options));
+        }
+    } else if (primitive_type == TYPE_STRUCT) {
+        if (value_type != simdjson::ondemand::json_type::object) {
+            return Status::DataQualityError(
+                    "Json value isn't object, but the column `{}` is struct.", column_name);
+        }
+        const auto* type_struct =
+                assert_cast<const DataTypeStruct*>(remove_nullable(type_desc).get());
+        auto* struct_column_ptr = assert_cast<ColumnStruct*>(data_column_ptr);
+        const auto sub_serdes = data_serde->get_nested_serdes();
+        std::map<std::string, size_t> sub_col_name_to_idx;
+        for (size_t sub_col_idx = 0; sub_col_idx < type_struct->get_elements().size();
+             ++sub_col_idx) {
+            sub_col_name_to_idx.emplace(lower_key(type_struct->get_element_name(sub_col_idx)),
+                                        sub_col_idx);
+        }
+        std::vector<bool> has_value(type_struct->get_elements().size(), false);
+        simdjson::ondemand::object struct_value = value.get_object();
+        for (auto sub : struct_value) {
+            const auto sub_key = lower_key(sub.unescaped_key().value());
+            const auto it = sub_col_name_to_idx.find(sub_key);
+            if (it == sub_col_name_to_idx.end()) {
+                continue;
+            }
+            const auto sub_column_idx = it->second;
+            auto sub_column_ptr = struct_column_ptr->get_column(sub_column_idx).get_ptr();
+            if (has_value[sub_column_idx]) {
+                // Struct fields follow Hive-style duplicate handling: the last matching nested key
+                // wins. Remove the earlier nested value before appending the new one.
+                sub_column_ptr->pop_back(1);
+            }
+            has_value[sub_column_idx] = true;
+            auto sub_value = sub.value().value();
+            RETURN_IF_ERROR(_write_data_to_column<use_string_cache>(
+                    sub_value, type_struct->get_element(sub_column_idx), sub_column_ptr.get(),
+                    column_name + "." + sub_key, sub_serdes[sub_column_idx], valid));
+        }
+        for (size_t sub_col_idx = 0; sub_col_idx < type_struct->get_elements().size();
+             ++sub_col_idx) {
+            if (has_value[sub_col_idx]) {
+                continue;
+            }
+            auto sub_column_ptr = struct_column_ptr->get_column(sub_col_idx).get_ptr();
+            if (!is_column_nullable(*sub_column_ptr)) {
+                return Status::DataQualityError(
+                        "Json file structColumn miss field {} and this column isn't nullable.",
+                        column_name + "." + type_struct->get_element_name(sub_col_idx));
+            }
+            sub_column_ptr->insert_default();
+        }
+    } else if (primitive_type == TYPE_MAP) {
+        if (value_type != simdjson::ondemand::json_type::object) {
+            return Status::DataQualityError("Json value isn't object, but the column `{}` is map.",
+                                            column_name);
+        }
+        const auto* map_type = assert_cast<const DataTypeMap*>(remove_nullable(type_desc).get());
+        auto* map_column_ptr = assert_cast<ColumnMap*>(data_column_ptr);
+        const auto sub_serdes = data_serde->get_nested_serdes();
+        size_t field_count = 0;
+        simdjson::ondemand::object object_value = value.get_object();
+        for (auto member_value : object_value) {
+            auto* key_column = map_column_ptr->get_keys_ptr()->assert_mutable()->get_ptr().get();
+            auto key_serde = sub_serdes[0];
+            if (is_column_nullable(*key_column)) {
+                auto* nullable_key = assert_cast<ColumnNullable*>(key_column);
+                nullable_key->get_null_map_data().push_back(0);
+                key_column = nullable_key->get_nested_column().get_ptr().get();
+                if (map_type->get_key_type()->is_nullable()) {
+                    key_serde = key_serde->get_nested_serdes()[0];
+                }
+            }
+            std::string_view key_view = member_value.unescaped_key().value();
+            Slice key_slice(key_view.data(), key_view.size());
+            RETURN_IF_ERROR(key_serde->deserialize_one_cell_from_json(*key_column, key_slice,
+                                                                      _serde_options));
+            simdjson::ondemand::value field_value = member_value.value().value();
+            RETURN_IF_ERROR(_write_data_to_column<use_string_cache>(
+                    field_value, map_type->get_value_type(),
+                    map_column_ptr->get_values_ptr()->assert_mutable()->get_ptr().get(),
+                    column_name + ".value", sub_serdes[1], valid));
+            ++field_count;
+        }
+        auto& offsets = map_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + field_count);
+    } else if (primitive_type == TYPE_ARRAY) {
+        if (value_type != simdjson::ondemand::json_type::array) {
+            return Status::DataQualityError("Json value isn't array, but the column `{}` is array.",
+                                            column_name);
+        }
+        const auto* array_type =
+                assert_cast<const DataTypeArray*>(remove_nullable(type_desc).get());
+        auto* array_column_ptr = assert_cast<ColumnArray*>(data_column_ptr);
+        const auto sub_serdes = data_serde->get_nested_serdes();
+        size_t field_count = 0;
+        simdjson::ondemand::array array_value = value.get_array();
+        for (simdjson::ondemand::value sub_value : array_value) {
+            RETURN_IF_ERROR(_write_data_to_column<use_string_cache>(
+                    sub_value, array_type->get_nested_type(),
+                    array_column_ptr->get_data().get_ptr().get(), column_name + ".element",
+                    sub_serdes[0], valid));
+            ++field_count;
+        }
+        auto& offsets = array_column_ptr->get_offsets();
+        offsets.emplace_back(offsets.back() + field_count);
+    } else {
+        return Status::InternalError("Not support JSON value to complex column");
+    }
+
+    if (nullable_column && value_type != simdjson::ondemand::json_type::null) {
+        nullable_column->get_null_map_data().push_back(0);
+    }
+    *valid = true;
+    return Status::OK();
+}
+
+Status JsonReader::_fill_missing_column(const RequestedColumn& column, IColumn* column_ptr,
+                                        bool* valid) {
+    if (column.slot_desc->is_nullable()) {
+        auto* nullable_column = assert_cast<ColumnNullable*>(column_ptr);
+        nullable_column->insert_default();
+        *valid = true;
+        return Status::OK();
+    }
+    return Status::DataQualityError(
+            "The column `{}` is not nullable, but it's not found in jsondata.",
+            column.slot_desc->col_name());
+}
+
+Status JsonReader::_append_null_for_malformed_json(Block* block) {
+    DORIS_CHECK(block != nullptr);
+    for (int i = 0; i < block->columns(); ++i) {
+        auto& column_with_type = block->get_by_position(i);
+        if (!is_column_nullable(*column_with_type.column)) {
+            return Status::DataQualityError("malformed json, but the column `{}` is not nullable.",
+                                            column_with_type.column->get_name());
+        }
+        auto column = IColumn::mutate(std::move(column_with_type.column));
+        assert_cast<ColumnNullable*>(column.get())->insert_default();
+        column_with_type.column = std::move(column);
+    }
+    return Status::OK();
+}
+
+Status JsonReader::_handle_json_error(const Status& status, Block* block, size_t original_rows,
+                                      bool* is_empty_row) {
+    DORIS_CHECK(block != nullptr);
+    DORIS_CHECK(is_empty_row != nullptr);
+    // Deserialization can fail after several columns have already appended data. Always restore the
+    // block to the row count before this document before either surfacing the error or appending
+    // the ignore-malformed null row.
+    _truncate_block_to_rows(block, original_rows);
+    if (_openx_json_ignore_malformed && status.is<ErrorCode::DATA_QUALITY_ERROR>()) {
+        RETURN_IF_ERROR(_append_null_for_malformed_json(block));
+        *is_empty_row = false;
+        return Status::OK();
+    }
+    return status;
+}
+
+Status JsonReader::_apply_filters(Block* file_block, size_t* rows) {
+    return apply_materialized_reader_filters(_request.get(), _io_ctx.get(), file_block, rows);
+}
+
+void JsonReader::_truncate_block_to_rows(Block* block, size_t num_rows) {
+    DORIS_CHECK(block != nullptr);
+    for (int i = 0; i < block->columns(); ++i) {
+        auto& column_with_type = block->get_by_position(i);
+        auto column = IColumn::mutate(std::move(column_with_type.column));
+        if (column->size() > num_rows) {
+            column->pop_back(column->size() - num_rows);
+        }
+        column_with_type.column = std::move(column);
+    }
+}
+
+void JsonReader::_pop_back_last_inserted_value(Block* block, size_t column_index) {
+    DORIS_CHECK(block != nullptr);
+    auto& column = block->get_by_position(column_index).column;
+    auto mutable_column = IColumn::mutate(std::move(column));
+    mutable_column->pop_back(1);
+    column = std::move(mutable_column);
+}
+
+size_t JsonReader::_column_index(std::string_view key, size_t key_index) {
+    std::string hive_key;
+    std::string_view lookup_key = key;
+    if (_is_hive_table) {
+        hive_key = lower_key(key);
+        lookup_key = hive_key;
+    }
+    if (key_index < _previous_positions.size()) {
+        // Most JSON lines share field order. Reuse the previous line's key-position mapping before
+        // falling back to the hash table lookup.
+        const auto previous = _previous_positions[key_index];
+        if (previous < _requested_columns.size()) {
+            const auto previous_name = _requested_columns[previous].slot_desc->col_name();
+            if ((_is_hive_table ? lower_key(previous_name) : previous_name) == lookup_key) {
+                return previous;
+            }
+        }
+    }
+    const auto it = _slot_name_to_index.find(std::string(lookup_key));
+    if (it == _slot_name_to_index.end()) {
+        return static_cast<size_t>(-1);
+    }
+    if (key_index >= _previous_positions.size()) {
+        _previous_positions.resize(key_index + 1, static_cast<size_t>(-1));
+    }
+    _previous_positions[key_index] = it->second;
+    return it->second;
+}
+
+bool JsonReader::_is_root_path_for_column(const RequestedColumn& column) const {
+    return column.source_index < _parsed_jsonpaths.size() &&
+           JsonFunctions::is_root_path(_parsed_jsonpaths[column.source_index]);
+}
+
+} // namespace doris::format::json
diff --git a/be/src/format_v2/json/json_reader.h b/be/src/format_v2/json/json_reader.h
new file mode 100644
index 00000000000000..52cdfad6728d64
--- /dev/null
+++ b/be/src/format_v2/json/json_reader.h
@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <simdjson/simdjson.h> // IWYU pragma: keep
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <vector>
+
+#include "core/custom_allocator.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "exprs/json_functions.h"
+#include "format_v2/file_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_profile.h"
+
+namespace doris {
+class Decompressor;
+class LineReader;
+class SlotDescriptor;
+class IColumn;
+} // namespace doris
+
+namespace doris::format::json {
+
+// FileScannerV2 JSON reader.
+//
+// JSON files do not carry an embedded physical schema. The v2 table layer still needs a
+// file-local schema and FileScanRequest contract, so this reader exposes FE-provided file slots as
+// v2 file-local columns and performs JSON parsing/materialization directly in the v2 path.
+class JsonReader final : public FileReader {
+public:
+    // `file_slot_descs` is the FE-planned file schema. JSON has no physical schema, so the reader
+    // exposes these slots as synthetic file-local columns and materializes only the columns
+    // requested by FileScanRequest.
+    JsonReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+               std::unique_ptr<io::FileDescription>& file_description,
+               std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+               const TFileScanRangeParams* scan_params, const TFileRangeDesc& range,
+               const std::vector<SlotDescriptor*>& file_slot_descs,
+               TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN,
+               std::optional<TUniqueId> stream_load_id = std::nullopt);
+    ~JsonReader() override;
+
+    // Initializes scan attributes and builds the synthetic schema from FE slots.
+    Status init(RuntimeState* state) override;
+    Status get_schema(std::vector<ColumnDefinition>* file_schema) const override;
+    std::unique_ptr<TableColumnMapper> create_column_mapper(
+            TableColumnMapperOptions options) const override;
+    // Opens the underlying file or stream and binds requested local column ids to output block
+    // positions. After this call, `get_block` can be called until it returns eof.
+    Status open(std::shared_ptr<FileScanRequest> request) override;
+    // Appends rows into `file_block` according to the FileScanRequest order. The block must already
+    // contain columns matching the requested positions.
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override;
+    Status close() override;
+
+private:
+    // A requested column keeps both identities:
+    // - `source_index`: index in FE file slots, used for jsonpaths and SerDe lookup.
+    // - `block_position`: index in the caller's output block, used for materialization.
+    struct RequestedColumn {
+        LocalColumnId file_column_id = LocalColumnId::invalid();
+        LocalIndex block_position;
+        size_t source_index = 0;
+        SlotDescriptor* slot_desc = nullptr;
+        DataTypeSerDeSPtr serde;
+    };
+
+    Status _build_requested_columns(const FileScanRequest& request,
+                                    std::vector<RequestedColumn>* columns) const;
+    // Reconciles TableReader's split/range descriptor with FileReader's concrete file description.
+    TFileRangeDesc _json_range() const;
+    Status _open_file_reader();
+    Status _create_decompressor();
+    Status _create_line_reader();
+    Status _parse_jsonpath_and_json_root();
+    // Reads one logical JSON document: one line for JSON Lines, or the whole range/pipe payload for
+    // single-document mode.
+    Status _read_one_document(size_t* size, bool* eof);
+    Status _read_one_document_from_pipe(size_t* read_size);
+    // Moves the logical document into a simdjson-padded buffer and creates an ondemand document.
+    Status _parse_next_json(size_t* size, bool* eof);
+    // Applies json_root and validates the object/array shape required by strip_outer_array.
+    Status _extract_json_value(size_t size, bool* eof, bool* is_empty_row);
+    Status _append_rows_from_current_value(Block* block, bool* is_empty_row, bool* eof);
+    Status _append_simple_json_rows(Block* block, bool* is_empty_row, bool* eof);
+    Status _append_flat_array_jsonpath_rows(Block* block, bool* is_empty_row, bool* eof);
+    Status _append_nested_jsonpath_row(Block* block, bool* is_empty_row, bool* eof);
+    Status _set_column_values_from_object(simdjson::ondemand::object* object_value, Block* block,
+                                          bool* valid);
+    Status _write_columns_by_jsonpath(simdjson::ondemand::object* object_value, Block* block,
+                                      bool* valid);
+    template <bool use_string_cache>
+    Status _write_data_to_column(simdjson::ondemand::value& value, const DataTypePtr& type_desc,
+                                 IColumn* column_ptr, const std::string& column_name,
+                                 const DataTypeSerDeSPtr& serde, bool* valid);
+    Status _fill_missing_column(const RequestedColumn& column, IColumn* column_ptr, bool* valid);
+    Status _append_null_for_malformed_json(Block* block);
+    Status _handle_json_error(const Status& status, Block* block, size_t original_rows,
+                              bool* is_empty_row);
+    Status _apply_filters(Block* file_block, size_t* rows);
+    void _truncate_block_to_rows(Block* block, size_t num_rows);
+    void _pop_back_last_inserted_value(Block* block, size_t column_index);
+    size_t _column_index(std::string_view key, size_t key_index);
+    bool _is_root_path_for_column(const RequestedColumn& column) const;
+
+    const TFileScanRangeParams* _scan_params = nullptr;
+    TFileRangeDesc _range;
+    TFileRangeDesc _reader_range;
+    std::vector<SlotDescriptor*> _source_file_slot_descs;
+    DataTypeSerDeSPtrs _source_serdes;
+    std::vector<ColumnDefinition> _file_schema;
+    RuntimeState* _runtime_state = nullptr;
+    TFileCompressType::type _range_compress_type = TFileCompressType::UNKNOWN;
+    std::optional<TUniqueId> _stream_load_id;
+    std::vector<RequestedColumn> _requested_columns;
+    std::unordered_map<std::string, size_t> _slot_name_to_index;
+    std::vector<size_t> _previous_positions;
+
+    io::FileReaderSPtr _physical_file_reader;
+    std::unique_ptr<Decompressor> _decompressor;
+    std::unique_ptr<LineReader> _line_reader;
+    int64_t _current_offset = 0;
+    bool _reader_eof = false;
+    bool _skip_first_line = false;
+    bool _single_document_read = false;
+
+    std::string _line_delimiter;
+    size_t _line_delimiter_length = 0;
+    std::string _jsonpaths;
+    std::string _json_root;
+    bool _read_json_by_line = false;
+    bool _strip_outer_array = false;
+    bool _num_as_string = false;
+    bool _fuzzy_parse = false;
+    bool _is_hive_table = false;
+    bool _openx_json_ignore_malformed = false;
+    TFileCompressType::type _file_compress_type = TFileCompressType::UNKNOWN;
+
+    std::vector<std::vector<JsonPath>> _parsed_jsonpaths;
+    std::vector<JsonPath> _parsed_json_root;
+    bool _parsed_from_json_root = false;
+    DataTypeSerDe::FormatOptions _serde_options;
+
+    // simdjson ondemand values point into `_padding_buffer`, so the buffer must outlive all values
+    // created from the current document.
+    std::unique_ptr<simdjson::ondemand::parser> _json_parser;
+    simdjson::ondemand::document _original_json_doc;
+    simdjson::ondemand::value _json_value;
+    simdjson::ondemand::array _array;
+    simdjson::ondemand::array_iterator _array_iter;
+    std::string _document_buffer;
+    std::string _padding_buffer;
+    size_t _original_doc_size = 0;
+    size_t _padded_size = 1024 * 1024 * 8 + simdjson::SIMDJSON_PADDING;
+    std::unordered_map<std::string_view, std::string_view> _cached_string_values;
+};
+
+} // namespace doris::format::json
diff --git a/be/src/format_v2/materialized_reader_util.cpp b/be/src/format_v2/materialized_reader_util.cpp
new file mode 100644
index 00000000000000..a7e533633510c4
--- /dev/null
+++ b/be/src/format_v2/materialized_reader_util.cpp
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/materialized_reader_util.h"
+
+#include <utility>
+
+#include "core/block/block.h"
+#include "core/data_type/data_type_nullable.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/file_reader.h"
+#include "io/io_common.h"
+
+namespace doris::format {
+namespace {
+
+void update_counter(RuntimeProfile::Counter* counter, int64_t value) {
+    if (counter != nullptr) {
+        COUNTER_UPDATE(counter, value);
+    }
+}
+
+} // namespace
+
+ColumnPtr make_column_nullable_if_needed(ColumnPtr column, const DataTypePtr& target_type) {
+    if (target_type != nullptr && target_type->is_nullable() && column.get() != nullptr &&
+        !column->is_nullable()) {
+        return make_nullable(std::move(column));
+    }
+    return column;
+}
+
+Status apply_materialized_reader_filters(const FileScanRequest* request, io::IOContext* io_ctx,
+                                         Block* file_block, size_t* rows,
+                                         const MaterializedReaderFilterProfile* profile) {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    const size_t rows_before_filter = *rows;
+    size_t rows_after_delete_filter = rows_before_filter;
+    if (request != nullptr && rows_before_filter > 0 && !request->delete_conjuncts.empty()) {
+        {
+            SCOPED_TIMER(profile == nullptr ? nullptr : profile->delete_conjunct_filter_time);
+            RETURN_IF_ERROR(VExprContext::filter_block(request->delete_conjuncts, file_block,
+                                                       file_block->columns()));
+        }
+        rows_after_delete_filter =
+                file_block->columns() == 0 ? rows_before_filter : file_block->rows();
+        if (profile != nullptr) {
+            update_counter(profile->rows_filtered_by_delete_conjunct,
+                           rows_before_filter - rows_after_delete_filter);
+        }
+    }
+
+    size_t rows_after_filter = rows_after_delete_filter;
+    if (request != nullptr && rows_after_delete_filter > 0 && !request->conjuncts.empty()) {
+        {
+            SCOPED_TIMER(profile == nullptr ? nullptr : profile->conjunct_filter_time);
+            RETURN_IF_ERROR(VExprContext::filter_block(request->conjuncts, file_block,
+                                                       file_block->columns()));
+        }
+        rows_after_filter =
+                file_block->columns() == 0 ? rows_after_delete_filter : file_block->rows();
+        const auto rows_filtered_by_conjunct = rows_after_delete_filter - rows_after_filter;
+        if (profile != nullptr) {
+            update_counter(profile->rows_filtered_by_conjunct, rows_filtered_by_conjunct);
+        }
+        if (io_ctx != nullptr) {
+            io_ctx->predicate_filtered_rows += rows_filtered_by_conjunct;
+        }
+    }
+    *rows = rows_after_filter;
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/materialized_reader_util.h b/be/src/format_v2/materialized_reader_util.h
new file mode 100644
index 00000000000000..2fb1383dfb9569
--- /dev/null
+++ b/be/src/format_v2/materialized_reader_util.h
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "common/status.h"
+#include "core/column/column.h"
+#include "core/data_type/data_type.h"
+#include "runtime/runtime_profile.h"
+
+namespace doris {
+class Block;
+
+namespace io {
+struct IOContext;
+} // namespace io
+
+namespace format {
+struct FileScanRequest;
+
+// Shared helpers for FileReader implementations that deserialize or build already materialized
+// Doris columns and then hand those columns to TableReader for final mapping.
+ColumnPtr make_column_nullable_if_needed(ColumnPtr column, const DataTypePtr& target_type);
+
+// Optional profile counters for text-like readers. Native/JSON do not expose per-reader filter
+// counters today, so they call apply_materialized_reader_filters() without this struct.
+struct MaterializedReaderFilterProfile {
+    RuntimeProfile::Counter* delete_conjunct_filter_time = nullptr;
+    RuntimeProfile::Counter* conjunct_filter_time = nullptr;
+    RuntimeProfile::Counter* rows_filtered_by_delete_conjunct = nullptr;
+    RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr;
+};
+
+// Applies file-local filters in the same order used by FileScannerV2 readers:
+// 1. delete_conjuncts remove rows that should not be visible to the scan output;
+// 2. conjuncts apply ordinary file-local predicates.
+//
+// Only ordinary conjunct filtering contributes to IOContext::predicate_filtered_rows. This matches
+// the previous JSON/Text/CSV behavior and keeps scanner accounting separate from delete filtering.
+// When `profile` is provided, the helper also updates text-reader timer and row counters so CSV
+// and Hive text keep their existing observability after sharing this implementation.
+Status apply_materialized_reader_filters(const FileScanRequest* request, io::IOContext* io_ctx,
+                                         Block* file_block, size_t* rows,
+                                         const MaterializedReaderFilterProfile* profile = nullptr);
+
+} // namespace format
+} // namespace doris
diff --git a/be/src/format_v2/native/native_reader.cpp b/be/src/format_v2/native/native_reader.cpp
new file mode 100644
index 00000000000000..2a0a89f80adc8d
--- /dev/null
+++ b/be/src/format_v2/native/native_reader.cpp
@@ -0,0 +1,311 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/native/native_reader.h"
+
+#include <cstring>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "core/block/block.h"
+#include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/data_type_nullable.h"
+#include "format/native/native_format.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/materialized_reader_util.h"
+#include "io/file_factory.h"
+#include "io/fs/tracing_file_reader.h"
+#include "runtime/runtime_state.h"
+#include "util/slice.h"
+
+namespace doris::format::native {
+namespace {
+
+Status parse_native_pblock(const std::string& buffer, const std::string& path, PBlock* pblock) {
+    DORIS_CHECK(pblock != nullptr);
+    if (!pblock->ParseFromArray(buffer.data(), cast_set<int>(buffer.size()))) {
+        return Status::InternalError("Failed to parse native PBlock from file {}", path);
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+NativeReader::NativeReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                           std::unique_ptr<io::FileDescription>& file_description,
+                           std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile)
+        : FileReader(system_properties, file_description, std::move(io_ctx), profile) {}
+
+NativeReader::~NativeReader() {
+    static_cast<void>(close());
+}
+
+Status NativeReader::init(RuntimeState* state) {
+    _runtime_state = state;
+    if (_file_description == nullptr) {
+        return Status::InvalidArgument("Native v2 reader requires file description");
+    }
+    RETURN_IF_ERROR(FileReader::init(state));
+    RETURN_IF_ERROR(_validate_and_consume_header());
+    return Status::OK();
+}
+
+Status NativeReader::get_schema(std::vector<ColumnDefinition>* file_schema) const {
+    if (file_schema == nullptr) {
+        return Status::InvalidArgument("Native v2 file_schema is null");
+    }
+    RETURN_IF_ERROR(_ensure_schema_loaded());
+    *file_schema = _file_schema;
+    return Status::OK();
+}
+
+std::unique_ptr<TableColumnMapper> NativeReader::create_column_mapper(
+        TableColumnMapperOptions options) const {
+    return std::make_unique<MaterializedColumnMapper>(std::move(options));
+}
+
+Status NativeReader::open(std::shared_ptr<FileScanRequest> request) {
+    RETURN_IF_ERROR(FileReader::open(std::move(request)));
+    DORIS_CHECK(_request != nullptr);
+    _first_block_consumed = false;
+    _reader_eof = false;
+    _eof = false;
+    return Status::OK();
+}
+
+Status NativeReader::get_block(Block* file_block, size_t* rows, bool* eof) {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    if (_request == nullptr) {
+        return Status::InternalError("Native v2 reader is not open");
+    }
+
+    *rows = 0;
+    *eof = false;
+    if (_reader_eof) {
+        *eof = true;
+        _eof = true;
+        return Status::OK();
+    }
+
+    std::string buffer;
+    bool local_eof = false;
+    if (_first_block_loaded && !_first_block_consumed) {
+        buffer = _first_block_buffer;
+    } else {
+        RETURN_IF_ERROR(_read_next_pblock(&buffer, &local_eof));
+    }
+
+    if (local_eof && buffer.empty()) {
+        _reader_eof = true;
+        *eof = true;
+        _eof = true;
+        return Status::OK();
+    }
+    if (buffer.empty()) {
+        return Status::InternalError("read empty native block from file {}",
+                                     _file_description->path);
+    }
+
+    PBlock pblock;
+    RETURN_IF_ERROR(parse_native_pblock(buffer, _file_description->path, &pblock));
+    if (!_schema_inited) {
+        RETURN_IF_ERROR(_init_schema_from_pblock(pblock));
+    }
+
+    Block source_block;
+    size_t uncompressed_bytes = 0;
+    int64_t decompress_time = 0;
+    RETURN_IF_ERROR(source_block.deserialize(pblock, &uncompressed_bytes, &decompress_time));
+    RETURN_IF_ERROR(_materialize_requested_columns(source_block, file_block));
+    *rows = file_block->rows();
+    RETURN_IF_ERROR(_apply_filters(file_block, rows));
+    _reader_statistics.read_rows += *rows;
+
+    if (_first_block_loaded && !_first_block_consumed) {
+        _first_block_consumed = true;
+    }
+    if (_current_offset >= _file_size) {
+        _reader_eof = true;
+    }
+    *eof = _reader_eof && *rows == 0;
+    _eof = *eof;
+    return Status::OK();
+}
+
+Status NativeReader::close() {
+    _file_reader.reset();
+    _tracing_file_reader.reset();
+    _request.reset();
+    _reader_eof = true;
+    _eof = true;
+    return Status::OK();
+}
+
+Status NativeReader::_validate_and_consume_header() {
+    DORIS_CHECK(_tracing_file_reader != nullptr);
+    _file_size = _tracing_file_reader->size();
+    _current_offset = 0;
+    _reader_eof = (_file_size == 0);
+
+    static constexpr size_t HEADER_SIZE = sizeof(DORIS_NATIVE_MAGIC) + sizeof(uint32_t);
+    if (_reader_eof || _file_size < cast_set<int64_t>(HEADER_SIZE)) {
+        return Status::InternalError(
+                "invalid Doris Native file {}, file size {} is smaller than header size {}",
+                _file_description->path, _file_size, HEADER_SIZE);
+    }
+
+    char header[HEADER_SIZE];
+    Slice header_slice(header, sizeof(header));
+    size_t bytes_read = 0;
+    RETURN_IF_ERROR(_tracing_file_reader->read_at(0, header_slice, &bytes_read, _io_ctx.get()));
+    if (bytes_read != sizeof(header)) {
+        return Status::InternalError(
+                "failed to read Doris Native header from file {}, expect {} bytes, got {} bytes",
+                _file_description->path, sizeof(header), bytes_read);
+    }
+    if (std::memcmp(header, DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC)) != 0) {
+        return Status::InternalError("invalid Doris Native magic header in file {}",
+                                     _file_description->path);
+    }
+
+    uint32_t version = 0;
+    std::memcpy(&version, header + sizeof(DORIS_NATIVE_MAGIC), sizeof(uint32_t));
+    if (version != DORIS_NATIVE_FORMAT_VERSION) {
+        return Status::InternalError(
+                "unsupported Doris Native format version {} in file {}, expect {}", version,
+                _file_description->path, DORIS_NATIVE_FORMAT_VERSION);
+    }
+
+    _current_offset = sizeof(header);
+    _reader_eof = (_file_size == _current_offset);
+    return Status::OK();
+}
+
+Status NativeReader::_ensure_schema_loaded() const {
+    if (_schema_inited) {
+        return Status::OK();
+    }
+    if (!_first_block_loaded) {
+        bool local_eof = false;
+        RETURN_IF_ERROR(_read_next_pblock(&_first_block_buffer, &local_eof));
+        if (local_eof && _first_block_buffer.empty()) {
+            return Status::EndOfFile("empty native file {}", _file_description->path);
+        }
+        if (_first_block_buffer.empty()) {
+            return Status::InternalError("first native block is empty {}", _file_description->path);
+        }
+        _first_block_loaded = true;
+    }
+
+    PBlock pblock;
+    RETURN_IF_ERROR(parse_native_pblock(_first_block_buffer, _file_description->path, &pblock));
+    RETURN_IF_ERROR(_init_schema_from_pblock(pblock));
+    return Status::OK();
+}
+
+Status NativeReader::_read_next_pblock(std::string* buffer, bool* eof) const {
+    DORIS_CHECK(buffer != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    DORIS_CHECK(_tracing_file_reader != nullptr);
+    buffer->clear();
+    *eof = false;
+
+    if (_current_offset >= _file_size) {
+        *eof = true;
+        return Status::OK();
+    }
+
+    uint64_t block_len = 0;
+    Slice len_slice(reinterpret_cast<char*>(&block_len), sizeof(block_len));
+    size_t bytes_read = 0;
+    RETURN_IF_ERROR(
+            _tracing_file_reader->read_at(_current_offset, len_slice, &bytes_read, _io_ctx.get()));
+    if (bytes_read == 0) {
+        *eof = true;
+        return Status::OK();
+    }
+    if (bytes_read != sizeof(block_len)) {
+        return Status::InternalError(
+                "Failed to read native block length from file {}, expect {}, actual {}",
+                _file_description->path, sizeof(block_len), bytes_read);
+    }
+    _current_offset += sizeof(block_len);
+    if (block_len == 0) {
+        *eof = (_current_offset >= _file_size);
+        return Status::OK();
+    }
+
+    buffer->assign(block_len, '\0');
+    Slice data_slice(buffer->data(), block_len);
+    bytes_read = 0;
+    RETURN_IF_ERROR(
+            _tracing_file_reader->read_at(_current_offset, data_slice, &bytes_read, _io_ctx.get()));
+    if (bytes_read != block_len) {
+        return Status::InternalError(
+                "Failed to read native block body from file {}, expect {}, actual {}",
+                _file_description->path, block_len, bytes_read);
+    }
+    _current_offset += block_len;
+    *eof = (_current_offset >= _file_size);
+    return Status::OK();
+}
+
+Status NativeReader::_init_schema_from_pblock(const PBlock& pblock) const {
+    _file_schema.clear();
+    _file_schema.reserve(pblock.column_metas_size());
+    for (int idx = 0; idx < pblock.column_metas_size(); ++idx) {
+        const auto& meta = pblock.column_metas(idx);
+        ColumnDefinition field;
+        field.identifier = Field::create_field<TYPE_STRING>(meta.name());
+        field.local_id = idx;
+        field.name = meta.name();
+        field.type = make_nullable(DataTypeFactory::instance().create_data_type(meta));
+        _file_schema.push_back(std::move(field));
+    }
+    _schema_inited = true;
+    return Status::OK();
+}
+
+Status NativeReader::_materialize_requested_columns(const Block& source_block,
+                                                    Block* file_block) const {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(_request != nullptr);
+    for (const auto& [file_column_id, block_position] : _request->local_positions) {
+        const auto source_idx = file_column_id.value();
+        if (source_idx < 0 || cast_set<size_t>(source_idx) >= source_block.columns()) {
+            return Status::InternalError("native file {} does not contain local column id {}",
+                                         _file_description->path, source_idx);
+        }
+        if (block_position.value() >= file_block->columns()) {
+            return Status::InternalError("native v2 request has invalid block position {}",
+                                         block_position.value());
+        }
+        const auto& target = file_block->get_by_position(block_position.value());
+        auto column = source_block.get_by_position(source_idx).column;
+        column = make_column_nullable_if_needed(std::move(column), target.type);
+        file_block->replace_by_position(block_position.value(), IColumn::mutate(std::move(column)));
+    }
+    return Status::OK();
+}
+
+Status NativeReader::_apply_filters(Block* file_block, size_t* rows) const {
+    return apply_materialized_reader_filters(_request.get(), _io_ctx.get(), file_block, rows);
+}
+
+} // namespace doris::format::native
diff --git a/be/src/format_v2/native/native_reader.h b/be/src/format_v2/native/native_reader.h
new file mode 100644
index 00000000000000..3719a6afd6c4f5
--- /dev/null
+++ b/be/src/format_v2/native/native_reader.h
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <gen_cpp/data.pb.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "format_v2/file_reader.h"
+
+namespace doris::format::native {
+
+// FileScannerV2 reader for Doris Native files.
+//
+// Native files are self-describing only through the first serialized PBlock. TableReader asks for
+// schema before open(), so this reader may read and cache that first PBlock during get_schema() and
+// then replay it as the first data batch after open().
+class NativeReader final : public FileReader {
+public:
+    NativeReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                 std::unique_ptr<io::FileDescription>& file_description,
+                 std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile);
+    ~NativeReader() override;
+
+    Status init(RuntimeState* state) override;
+    Status get_schema(std::vector<ColumnDefinition>* file_schema) const override;
+    std::unique_ptr<TableColumnMapper> create_column_mapper(
+            TableColumnMapperOptions options) const override;
+    Status open(std::shared_ptr<FileScanRequest> request) override;
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override;
+    Status close() override;
+
+private:
+    Status _validate_and_consume_header();
+    Status _ensure_schema_loaded() const;
+    Status _read_next_pblock(std::string* buffer, bool* eof) const;
+    Status _init_schema_from_pblock(const PBlock& pblock) const;
+    Status _materialize_requested_columns(const Block& source_block, Block* file_block) const;
+    Status _apply_filters(Block* file_block, size_t* rows) const;
+
+    RuntimeState* _runtime_state = nullptr;
+    mutable int64_t _current_offset = 0;
+    mutable int64_t _file_size = 0;
+    mutable bool _reader_eof = true;
+    mutable bool _schema_inited = false;
+    mutable std::vector<ColumnDefinition> _file_schema;
+    mutable std::string _first_block_buffer;
+    mutable bool _first_block_loaded = false;
+    mutable bool _first_block_consumed = false;
+};
+
+} // namespace doris::format::native
diff --git a/be/src/format_v2/parquet/parquet_column_schema.cpp b/be/src/format_v2/parquet/parquet_column_schema.cpp
new file mode 100644
index 00000000000000..b42d47987a54cb
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_column_schema.cpp
@@ -0,0 +1,492 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_column_schema.h"
+
+#include <parquet/api/schema.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "format_v2/parquet/parquet_type.h"
+
+namespace doris::format::parquet {
+namespace {
+
+struct SchemaBuildContext {
+    int32_t local_id = -1;                          // child ordinal in the parent node
+    int16_t definition_level = 0;                   // accumulated optional/repeated level count
+    int16_t repetition_level = 0;                   // accumulated repeated level count
+    int16_t nullable_definition_level = 0;          // definition level of the nearest optional node
+    int16_t repeated_repetition_level = 0;          // repetition level of the nearest repeated node
+    int16_t repeated_ancestor_definition_level = 0; // definition level of the nearest repeated node
+};
+
+enum class SchemaBuildMode {
+    // Normal recursive schema build. Bare repeated fields are exposed as Doris ARRAY for
+    // protobuf/legacy Parquet compatibility, while repeated LIST/MAP annotated groups are rejected
+    // because Parquet LIST/MAP outer groups are not allowed to be repeated at a top-level or struct
+    // field boundary.
+    NORMAL,
+    // Build the current repeated node as the already-selected element of an enclosing LIST. This
+    // is the compatibility path for Arrow/parquet-format legacy two-level LIST encodings where the
+    // repeated node itself is the array element instead of a wrapper that should be stripped.
+    REPEATED_NODE_AS_LIST_ELEMENT,
+    // Build the current repeated group as a STRUCT element of an enclosing LIST, ignoring LIST/MAP
+    // annotations on the repeated group itself. This keeps compatibility with the old Doris
+    // Parquet schema parser for Hive/legacy wrappers named "array" or "<list_name>_tuple".
+    REPEATED_NODE_AS_STRUCT_ELEMENT,
+};
+
+// Result of applying Parquet LIST backward compatibility rules to the single repeated child of a
+// LIST-annotated group. The repeated child can either be a physical wrapper whose only child is the
+// element, or the element node itself.
+struct ListElementResolution {
+    // Parquet node that should be exposed as Doris ARRAY element.
+    const ::parquet::schema::Node* element_node = nullptr;
+    // Level state after consuming the LIST repeated child. The parent ARRAY schema keeps this state
+    // to materialize offsets, empty arrays and null arrays.
+    SchemaBuildContext repeated_context;
+    // Level state used to build element_node. This equals repeated_context when the repeated child
+    // itself is the element, and includes the wrapper's only child when standard 3-level LIST
+    // encoding is stripped.
+    SchemaBuildContext element_context;
+    // Build mode for element_node. Non-NORMAL modes mean element_node is the repeated child itself,
+    // and the repeated level must not be interpreted as a second unrelated array at the same
+    // boundary.
+    SchemaBuildMode element_build_mode = SchemaBuildMode::NORMAL;
+};
+
+// Resolved repeated entry group of a MAP-annotated group. The entry wrapper is a physical Parquet
+// encoding detail; Doris folds it into the parent MAP schema and exposes only direct [key, value]
+// children.
+struct MapEntryResolution {
+    const ::parquet::schema::GroupNode* entry_group = nullptr;
+    // Level state after consuming the repeated entry group. The parent MAP schema keeps this state
+    // to materialize offsets, empty maps and null maps.
+    SchemaBuildContext entry_context;
+};
+
+bool is_list_node(const ::parquet::schema::Node& node) {
+    const auto& logical_type = node.logical_type();
+    return node.converted_type() == ::parquet::ConvertedType::LIST ||
+           (logical_type != nullptr && logical_type->is_valid() && logical_type->is_list());
+}
+
+bool is_map_node(const ::parquet::schema::Node& node) {
+    const auto& logical_type = node.logical_type();
+    return node.converted_type() == ::parquet::ConvertedType::MAP ||
+           node.converted_type() == ::parquet::ConvertedType::MAP_KEY_VALUE ||
+           (logical_type != nullptr && logical_type->is_valid() && logical_type->is_map());
+}
+
+bool has_logical_annotation(const ::parquet::schema::Node& node) {
+    const auto& logical_type = node.logical_type();
+    return (node.converted_type() != ::parquet::ConvertedType::NONE &&
+            node.converted_type() != ::parquet::ConvertedType::UNDEFINED) ||
+           (logical_type != nullptr && logical_type->is_valid() && !logical_type->is_none());
+}
+
+bool has_structural_list_name(const std::string& list_name, const std::string& repeated_name) {
+    return repeated_name == "array" || repeated_name == list_name + "_tuple";
+}
+
+bool should_build_repeated_field_as_list(const ::parquet::schema::Node& node) {
+    return node.is_repeated() && !is_list_node(node) && !is_map_node(node);
+}
+
+DataTypePtr nullable_if_needed(DataTypePtr type, const ::parquet::schema::Node& node) {
+    return node.is_optional() ? make_nullable(type) : type;
+}
+
+void inherit_common_schema_state(const ::parquet::schema::Node& node,
+                                 const SchemaBuildContext& context,
+                                 ParquetColumnSchema* column_schema) {
+    DORIS_CHECK(column_schema != nullptr);
+    column_schema->local_id = context.local_id;
+    column_schema->parquet_field_id = node.field_id();
+    column_schema->name = node.name();
+    column_schema->max_definition_level = context.definition_level;
+    column_schema->max_repetition_level = context.repetition_level;
+    column_schema->nullable_definition_level = context.nullable_definition_level;
+    column_schema->definition_level = context.definition_level;
+    column_schema->repetition_level = context.repetition_level;
+    column_schema->repeated_ancestor_definition_level = context.repeated_ancestor_definition_level;
+    column_schema->repeated_repetition_level = context.repeated_repetition_level;
+}
+
+SchemaBuildContext child_context(const SchemaBuildContext& parent,
+                                 const ::parquet::schema::Node& child_node, int32_t child_idx) {
+    SchemaBuildContext result = parent;
+    result.local_id = child_idx;
+    if (child_node.repetition() == ::parquet::Repetition::OPTIONAL) {
+        result.definition_level++;
+        result.nullable_definition_level = result.definition_level;
+    }
+    if (child_node.is_repeated()) {
+        result.repetition_level++;
+        result.definition_level++;
+        result.repeated_repetition_level = result.repetition_level;
+        result.repeated_ancestor_definition_level = result.definition_level;
+    }
+    return result;
+}
+
+void propagate_child_levels(ParquetColumnSchema* column_schema) {
+    DORIS_CHECK(column_schema != nullptr);
+    for (const auto& child : column_schema->children) {
+        column_schema->max_definition_level =
+                std::max(column_schema->max_definition_level, child->max_definition_level);
+        column_schema->max_repetition_level =
+                std::max(column_schema->max_repetition_level, child->max_repetition_level);
+    }
+}
+
+// Mirrors Arrow's ResolveList() compatibility rules, but only decides which Parquet node is the
+// logical LIST element. The caller still builds Doris' semantic LIST->[element] schema tree.
+// Important cases:
+// - repeated primitive: the primitive itself is the element (legacy two-level LIST).
+// - repeated group with multiple children: the group itself is a STRUCT element.
+// - repeated group named "array" or "<list_name>_tuple": the group itself is a STRUCT element per
+//   Parquet backward compatibility rules, even when it has one child or its own logical annotation.
+//   This also keeps v2 file-local schema aligned with Doris' old schema parser used by HDFS TVF.
+// - other repeated group with a logical annotation, or whose only child is repeated: the group
+//   itself is the element. This preserves nested LIST/MAP and repeated fields inside struct
+//   elements.
+// - otherwise, strip the one-child repeated wrapper as standard three-level LIST encoding.
+Status resolve_list_element_node(const ::parquet::schema::GroupNode& list_group,
+                                 const SchemaBuildContext& list_context,
+                                 ListElementResolution* result) {
+    if (result == nullptr) {
+        return Status::InvalidArgument("result is null");
+    }
+    if (list_group.field_count() != 1) {
+        return Status::NotSupported("Unsupported parquet LIST encoding for column {}",
+                                    list_group.name());
+    }
+    const auto& repeated_node = *list_group.field(0);
+    if (!repeated_node.is_repeated()) {
+        return Status::NotSupported("Unsupported parquet LIST encoding for column {}",
+                                    list_group.name());
+    }
+    result->repeated_context = child_context(list_context, repeated_node, 0);
+    if (repeated_node.is_primitive()) {
+        result->element_node = &repeated_node;
+        result->element_context = result->repeated_context;
+        result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT;
+        return Status::OK();
+    }
+
+    const auto& repeated_group = static_cast<const ::parquet::schema::GroupNode&>(repeated_node);
+    if (repeated_group.field_count() == 0) {
+        return Status::NotSupported("Unsupported parquet LIST element layout for column {}",
+                                    list_group.name());
+    }
+    const bool repeated_group_has_logical_annotation = has_logical_annotation(repeated_group);
+    if (repeated_group.field_count() > 1 ||
+        has_structural_list_name(list_group.name(), repeated_group.name())) {
+        result->element_node = &repeated_node;
+        result->element_context = result->repeated_context;
+        result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT;
+        return Status::OK();
+    }
+    if (repeated_group_has_logical_annotation) {
+        result->element_node = &repeated_node;
+        result->element_context = result->repeated_context;
+        result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT;
+        return Status::OK();
+    }
+
+    const auto& only_child = *repeated_group.field(0);
+    if (only_child.is_repeated()) {
+        result->element_node = &repeated_node;
+        result->element_context = result->repeated_context;
+        result->element_build_mode = SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT;
+        return Status::OK();
+    }
+
+    result->element_node = &only_child;
+    result->element_context = child_context(result->repeated_context, only_child, 0);
+    return Status::OK();
+}
+
+// Resolves the repeated entry group of a MAP/MAP_KEY_VALUE node. Unlike LIST, MAP has no supported
+// two-level form in this reader: Doris requires a repeated group with exactly key and value
+// children, then folds that physical entry group out of ParquetColumnSchema. Some external writers
+// emit optional MAP keys even though standard Parquet MAP keys are required; keep the key's
+// definition levels and expose it as nullable for compatibility with the old reader.
+Status resolve_map_entry_group(const ::parquet::schema::GroupNode& map_group,
+                               const SchemaBuildContext& map_context, MapEntryResolution* result) {
+    if (result == nullptr) {
+        return Status::InvalidArgument("result is null");
+    }
+    if (map_group.field_count() != 1) {
+        return Status::NotSupported("Unsupported parquet MAP encoding for column {}",
+                                    map_group.name());
+    }
+    const auto& entry_node = *map_group.field(0);
+    if (!entry_node.is_repeated()) {
+        return Status::NotSupported("Unsupported parquet MAP encoding for column {}",
+                                    map_group.name());
+    }
+    if (entry_node.is_primitive()) {
+        return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}",
+                                    map_group.name());
+    }
+    const auto& entry_group = static_cast<const ::parquet::schema::GroupNode&>(entry_node);
+    if (entry_group.field_count() != 2) {
+        return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}",
+                                    map_group.name());
+    }
+    // The Parquet logical MAP spec requires key to be REQUIRED. Some legacy/Hive-written files
+    // still mark the key field OPTIONAL even when all actual keys are non-null, for example:
+    //   optional group t_map_varchar (MAP) {
+    //     repeated group key_value {
+    //       optional binary key (STRING);
+    //       optional binary value (STRING);
+    //     }
+    //   }
+    // Accept that schema here so compatible files can be read. MapColumnReader validates the
+    // materialized key column and rejects data that really contains null map keys.
+    result->entry_group = &entry_group;
+    result->entry_context = child_context(map_context, entry_node, 0);
+    return Status::OK();
+}
+
+Status build_node_schema_with_mode(const ::parquet::SchemaDescriptor& schema,
+                                   const ::parquet::schema::Node& node,
+                                   const SchemaBuildContext& context,
+                                   std::unique_ptr<ParquetColumnSchema>* result,
+                                   SchemaBuildMode mode);
+
+// Builds a semantic ARRAY schema for a bare repeated field. Arrow handles this in
+// NodeToSchemaField()/GroupToSchemaField(); Doris needs the same compatibility behavior because
+// protobuf and old parquet writers often encode repeated fields without a LIST annotation.
+// Example:
+//   optional group event {
+//     repeated group links {
+//       optional binary url (UTF8);
+//       optional int32 rank;
+//     }
+//   }
+// Doris exposes event.links as ARRAY<STRUCT<url, rank>>, not STRUCT<url, rank>. This keeps v2's
+// file-local schema aligned with the old schema parser used by HDFS TVF schema fetching.
+// When the repeated field appears inside an already resolved LIST element, only the nested repeated
+// child should be wrapped:
+//   optional group a (LIST) {
+//     repeated group element {
+//       repeated int32 items;
+//     }
+//   }
+// The outer LIST element is the repeated "element" group, and its repeated "items" child should be
+// represented as a field of type ARRAY<INT> inside the struct element.
+Status build_repeated_field_as_list_schema(const ::parquet::SchemaDescriptor& schema,
+                                           const ::parquet::schema::Node& repeated_node,
+                                           const SchemaBuildContext& repeated_context,
+                                           std::unique_ptr<ParquetColumnSchema>* result) {
+    if (result == nullptr) {
+        return Status::InvalidArgument("result is null");
+    }
+    auto list_schema = std::make_unique<ParquetColumnSchema>();
+    inherit_common_schema_state(repeated_node, repeated_context, list_schema.get());
+    list_schema->kind = ParquetColumnSchemaKind::LIST;
+    list_schema->definition_level = repeated_context.definition_level;
+    list_schema->repetition_level = repeated_context.repetition_level;
+    list_schema->repeated_repetition_level = repeated_context.repeated_repetition_level;
+
+    std::unique_ptr<ParquetColumnSchema> element_child;
+    RETURN_IF_ERROR(build_node_schema_with_mode(schema, repeated_node, repeated_context,
+                                                &element_child,
+                                                SchemaBuildMode::REPEATED_NODE_AS_LIST_ELEMENT));
+    element_child->name = "element";
+    list_schema->type = std::make_shared<DataTypeArray>(element_child->type);
+    list_schema->children.push_back(std::move(element_child));
+    propagate_child_levels(list_schema.get());
+    *result = std::move(list_schema);
+    return Status::OK();
+}
+
+// Recursively builds ParquetColumnSchema for the given schema node and its children in Parquet
+// file's metadata. NORMAL mode exposes bare repeated fields as ARRAY for legacy compatibility.
+// REPEATED_NODE_AS_LIST_ELEMENT mode means the current repeated node was already selected as an
+// enclosing LIST element, so only its nested bare repeated children should be wrapped.
+Status build_node_schema_with_mode(const ::parquet::SchemaDescriptor& schema,
+                                   const ::parquet::schema::Node& node,
+                                   const SchemaBuildContext& context,
+                                   std::unique_ptr<ParquetColumnSchema>* result,
+                                   SchemaBuildMode mode) {
+    if (result == nullptr) {
+        return Status::InvalidArgument("result is null");
+    }
+    if (mode == SchemaBuildMode::NORMAL && should_build_repeated_field_as_list(node)) {
+        return build_repeated_field_as_list_schema(schema, node, context, result);
+    }
+
+    auto column_schema = std::make_unique<ParquetColumnSchema>();
+    inherit_common_schema_state(node, context, column_schema.get());
+
+    if (node.is_primitive()) {
+        const int leaf_column_id = schema.ColumnIndex(node);
+        if (leaf_column_id < 0) {
+            return Status::InvalidArgument("Cannot find leaf column id for parquet column {}",
+                                           node.name());
+        }
+        column_schema->kind = ParquetColumnSchemaKind::PRIMITIVE;
+        column_schema->leaf_column_id = leaf_column_id;
+        column_schema->descriptor = schema.Column(leaf_column_id);
+        if (column_schema->descriptor != nullptr) {
+            column_schema->max_definition_level = column_schema->descriptor->max_definition_level();
+            column_schema->max_repetition_level = column_schema->descriptor->max_repetition_level();
+        }
+        column_schema->type_descriptor = resolve_parquet_type(column_schema->descriptor);
+        column_schema->type = column_schema->type_descriptor.doris_type;
+        if (column_schema->type == nullptr) {
+            if (!column_schema->type_descriptor.unsupported_reason.empty()) {
+                return Status::NotSupported("Unsupported parquet column '{}': {}", node.name(),
+                                            column_schema->type_descriptor.unsupported_reason);
+            }
+            return Status::NotSupported("Unsupported parquet column type for column {}",
+                                        node.name());
+        }
+        column_schema->type = node.is_optional()
+                                      ? make_nullable(remove_nullable(column_schema->type))
+                                      : remove_nullable(column_schema->type);
+        *result = std::move(column_schema);
+        return Status::OK();
+    }
+
+    const auto& group = static_cast<const ::parquet::schema::GroupNode&>(node);
+    if (is_list_node(node) && mode != SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT) {
+        if (mode == SchemaBuildMode::NORMAL && node.is_repeated()) {
+            return Status::NotSupported("Unsupported repeated parquet LIST column {}", node.name());
+        }
+        column_schema->kind = ParquetColumnSchemaKind::LIST;
+        ListElementResolution list_element;
+        RETURN_IF_ERROR(resolve_list_element_node(group, context, &list_element));
+        column_schema->definition_level = list_element.repeated_context.definition_level;
+        column_schema->repetition_level = list_element.repeated_context.repetition_level;
+        column_schema->repeated_repetition_level =
+                list_element.repeated_context.repeated_repetition_level;
+        std::unique_ptr<ParquetColumnSchema> child;
+        RETURN_IF_ERROR(build_node_schema_with_mode(schema, *list_element.element_node,
+                                                    list_element.element_context, &child,
+                                                    list_element.element_build_mode));
+        child->name = "element";
+        column_schema->type =
+                nullable_if_needed(std::make_shared<DataTypeArray>(child->type), node);
+        column_schema->children.push_back(std::move(child));
+        propagate_child_levels(column_schema.get());
+        *result = std::move(column_schema);
+        return Status::OK();
+    }
+
+    if (is_map_node(node) && mode != SchemaBuildMode::REPEATED_NODE_AS_STRUCT_ELEMENT) {
+        if (mode == SchemaBuildMode::NORMAL && node.is_repeated()) {
+            return Status::NotSupported("Unsupported repeated parquet MAP column {}", node.name());
+        }
+        column_schema->kind = ParquetColumnSchemaKind::MAP;
+        MapEntryResolution map_entry;
+        RETURN_IF_ERROR(resolve_map_entry_group(group, context, &map_entry));
+        column_schema->definition_level = map_entry.entry_context.definition_level;
+        column_schema->repetition_level = map_entry.entry_context.repetition_level;
+        column_schema->repeated_repetition_level =
+                map_entry.entry_context.repeated_repetition_level;
+        for (int child_idx = 0; child_idx < map_entry.entry_group->field_count(); ++child_idx) {
+            std::unique_ptr<ParquetColumnSchema> child;
+            RETURN_IF_ERROR(build_node_schema_with_mode(
+                    schema, *map_entry.entry_group->field(child_idx),
+                    child_context(map_entry.entry_context, *map_entry.entry_group->field(child_idx),
+                                  child_idx),
+                    &child, SchemaBuildMode::NORMAL));
+            child->name = child_idx == 0 ? "key" : "value";
+            column_schema->children.push_back(std::move(child));
+        }
+        if (column_schema->children.size() != 2) {
+            return Status::NotSupported("Unsupported parquet MAP key_value layout for column {}",
+                                        node.name());
+        }
+        auto key_type = make_nullable(column_schema->children[0]->type);
+        auto value_type = make_nullable(column_schema->children[1]->type);
+        column_schema->type =
+                nullable_if_needed(std::make_shared<DataTypeMap>(key_type, value_type), node);
+        propagate_child_levels(column_schema.get());
+        *result = std::move(column_schema);
+        return Status::OK();
+    }
+
+    column_schema->kind = ParquetColumnSchemaKind::STRUCT;
+    DataTypes child_types;
+    Strings child_names;
+    child_types.reserve(group.field_count());
+    child_names.reserve(group.field_count());
+    for (int child_idx = 0; child_idx < group.field_count(); ++child_idx) {
+        const auto& child_node = *group.field(child_idx);
+        std::unique_ptr<ParquetColumnSchema> child;
+        const auto child_ctx = child_context(context, child_node, child_idx);
+        if (should_build_repeated_field_as_list(child_node)) {
+            RETURN_IF_ERROR(
+                    build_repeated_field_as_list_schema(schema, child_node, child_ctx, &child));
+        } else {
+            RETURN_IF_ERROR(build_node_schema_with_mode(schema, child_node, child_ctx, &child,
+                                                        SchemaBuildMode::NORMAL));
+        }
+        child_types.push_back(make_nullable(child->type));
+        child_names.push_back(child->name);
+        column_schema->children.push_back(std::move(child));
+    }
+    column_schema->type =
+            nullable_if_needed(std::make_shared<DataTypeStruct>(child_types, child_names), node);
+    propagate_child_levels(column_schema.get());
+    *result = std::move(column_schema);
+    return Status::OK();
+}
+
+Status build_node_schema(const ::parquet::SchemaDescriptor& schema,
+                         const ::parquet::schema::Node& node, const SchemaBuildContext& context,
+                         std::unique_ptr<ParquetColumnSchema>* result) {
+    return build_node_schema_with_mode(schema, node, context, result, SchemaBuildMode::NORMAL);
+}
+
+} // namespace
+
+Status build_parquet_column_schema(const ::parquet::SchemaDescriptor& schema,
+                                   std::vector<std::unique_ptr<ParquetColumnSchema>>* fields) {
+    if (fields == nullptr) {
+        return Status::InvalidArgument("fields is null");
+    }
+    fields->clear();
+    const auto* root = schema.group_node();
+    if (root == nullptr) {
+        return Status::InvalidArgument("Parquet schema root is null");
+    }
+    fields->reserve(root->field_count());
+    for (int field_idx = 0; field_idx < root->field_count(); ++field_idx) {
+        std::unique_ptr<ParquetColumnSchema> field;
+        SchemaBuildContext context;
+        RETURN_IF_ERROR(build_node_schema(
+                schema, *root->field(field_idx),
+                child_context(context, *root->field(field_idx), field_idx), &field));
+        fields->push_back(std::move(field));
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_column_schema.h b/be/src/format_v2/parquet/parquet_column_schema.h
new file mode 100644
index 00000000000000..1fb7262aabde6f
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_column_schema.h
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/data_type/data_type.h"
+#include "format_v2/parquet/parquet_type.h"
+
+namespace parquet {
+class ColumnDescriptor;
+class SchemaDescriptor;
+} // namespace parquet
+
+namespace doris::format::parquet {
+
+enum class ParquetColumnSchemaKind {
+    PRIMITIVE, // primitive leaf -> ScalarColumnReader
+    STRUCT,    // struct -> StructColumnReader
+    LIST,      // array -> ListColumnReader
+    MAP,       // map -> MapColumnReader
+};
+
+// ============================================================================
+// ============================================================================
+// ============================================================================
+struct ParquetColumnSchema {
+    int local_id = -1;
+
+    int parquet_field_id = -1;
+
+    std::string name;
+
+    DataTypePtr type = nullptr;
+
+    int leaf_column_id = -1;
+
+    ParquetTypeDescriptor type_descriptor {};
+
+    ParquetColumnSchemaKind kind = ParquetColumnSchemaKind::PRIMITIVE;
+
+    const ::parquet::ColumnDescriptor* descriptor = nullptr;
+
+    // ======== Dremel Levels ========
+
+    int16_t max_definition_level = 0;
+    int16_t max_repetition_level = 0;
+
+    int16_t nullable_definition_level = 0;
+
+    int16_t definition_level = 0;
+    int16_t repetition_level = 0;
+
+    int16_t repeated_ancestor_definition_level = 0;
+
+    int16_t repeated_repetition_level = 0;
+
+    std::vector<std::unique_ptr<ParquetColumnSchema>> children {};
+};
+
+Status build_parquet_column_schema(const ::parquet::SchemaDescriptor& schema,
+                                   std::vector<std::unique_ptr<ParquetColumnSchema>>* fields);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_file_context.cpp b/be/src/format_v2/parquet/parquet_file_context.cpp
new file mode 100644
index 00000000000000..dd9bf6aa9545f3
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_file_context.cpp
@@ -0,0 +1,442 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_file_context.h"
+
+#include <arrow/buffer.h>
+#include <arrow/result.h>
+#include <fmt/format.h>
+#include <gen_cpp/segment_v2.pb.h>
+#include <parquet/exception.h>
+
+#include <algorithm>
+#include <cstring>
+#include <exception>
+#include <mutex>
+#include <unordered_map>
+#include <utility>
+
+#include "common/check.h"
+#include "common/config.h"
+#include "io/file_factory.h"
+#include "io/fs/file_reader.h"
+#include "storage/cache/page_cache.h"
+#include "util/slice.h"
+
+namespace doris::format::parquet {
+
+namespace detail {
+
+std::vector<ParquetPageCacheReadPlanEntry> plan_page_cache_range_read(
+        int64_t position, int64_t nbytes, const std::vector<ParquetPageCacheRange>& cached_ranges) {
+    if (position < 0 || nbytes <= 0) {
+        return {};
+    }
+
+    std::vector<ParquetPageCacheRange> ranges;
+    ranges.reserve(cached_ranges.size());
+    const int64_t request_end = position + nbytes;
+    for (const auto& range : cached_ranges) {
+        if (range.size > 0 && range.offset < request_end && position < range.end_offset()) {
+            ranges.push_back(range);
+        }
+    }
+    std::sort(ranges.begin(), ranges.end(), [](const auto& lhs, const auto& rhs) {
+        if (lhs.offset != rhs.offset) {
+            return lhs.offset < rhs.offset;
+        }
+        return lhs.size > rhs.size;
+    });
+
+    std::vector<ParquetPageCacheReadPlanEntry> plan;
+    int64_t cursor = position;
+    while (cursor < request_end) {
+        // At each cursor position, choose the cached range that already covers the cursor and
+        // extends farthest to the right. This handles both adjacent ranges and overlapping
+        // ranges. If no range covers the current cursor, there is a gap and the request must
+        // miss as a whole.
+        auto best = ranges.end();
+        int64_t best_end = cursor;
+        for (auto it = ranges.begin(); it != ranges.end(); ++it) {
+            const int64_t cached_end = it->end_offset();
+            if (it->offset <= cursor && cursor < cached_end && cached_end > best_end) {
+                best = it;
+                best_end = cached_end;
+            }
+        }
+        if (best == ranges.end()) {
+            return {};
+        }
+        const int64_t copy_size = std::min(best_end, request_end) - cursor;
+        ParquetPageCacheReadPlanEntry entry;
+        entry.cached_range = *best;
+        entry.copy_offset_in_cache = cursor - best->offset;
+        entry.output_offset = cursor - position;
+        entry.copy_size = copy_size;
+        plan.push_back(entry);
+        cursor += copy_size;
+    }
+    return plan;
+}
+
+} // namespace detail
+
+namespace {
+
+// StoragePageCache only supports exact-key lookup. Keep lightweight range metadata here so later
+// Arrow ReadAt requests can reuse cached bytes when their requested ranges are subsets of, or are
+// fully covered by, previously cached ranges. Stale metadata is pruned on lookup.
+std::mutex cached_page_range_index_mutex;
+std::unordered_map<std::string, std::vector<ParquetPageCacheRange>> cached_page_range_index;
+constexpr size_t MAX_CACHED_PAGE_RANGE_FILES = 4096;
+constexpr size_t MAX_CACHED_PAGE_RANGES_PER_FILE = 65536;
+
+void register_cached_page_range(const std::string& file_key, int64_t position, int64_t nbytes) {
+    DORIS_CHECK(nbytes > 0);
+    std::lock_guard lock(cached_page_range_index_mutex);
+    if (cached_page_range_index.find(file_key) == cached_page_range_index.end() &&
+        cached_page_range_index.size() >= MAX_CACHED_PAGE_RANGE_FILES) {
+        cached_page_range_index.erase(cached_page_range_index.begin());
+    }
+    auto& ranges = cached_page_range_index[file_key];
+    auto it = std::find_if(ranges.begin(), ranges.end(), [&](const ParquetPageCacheRange& range) {
+        return range.offset == position && range.size == nbytes;
+    });
+    if (it == ranges.end()) {
+        if (ranges.size() >= MAX_CACHED_PAGE_RANGES_PER_FILE) {
+            ranges.erase(ranges.begin());
+        }
+        ranges.push_back(ParquetPageCacheRange {position, nbytes});
+    }
+}
+
+void unregister_cached_page_range(const std::string& file_key,
+                                  const ParquetPageCacheRange& stale_range) {
+    std::lock_guard lock(cached_page_range_index_mutex);
+    auto it = cached_page_range_index.find(file_key);
+    if (it == cached_page_range_index.end()) {
+        return;
+    }
+    auto& ranges = it->second;
+    ranges.erase(std::remove_if(ranges.begin(), ranges.end(),
+                                [&](const ParquetPageCacheRange& range) {
+                                    return range.offset == stale_range.offset &&
+                                           range.size == stale_range.size;
+                                }),
+                 ranges.end());
+    if (ranges.empty()) {
+        cached_page_range_index.erase(it);
+    }
+}
+
+std::vector<ParquetPageCacheRange> cached_page_ranges_for_file(const std::string& file_key) {
+    std::lock_guard lock(cached_page_range_index_mutex);
+    auto it = cached_page_range_index.find(file_key);
+    if (it == cached_page_range_index.end()) {
+        return {};
+    }
+    return it->second;
+}
+
+std::string build_page_cache_file_key(const io::FileReader& file_reader,
+                                      const io::FileDescription& file_description) {
+    const int64_t mtime =
+            file_description.mtime != 0 ? file_description.mtime : file_reader.mtime();
+    if (mtime == 0) {
+        // StoragePageCache is process-global. A key with only path + unknown mtime can outlive a
+        // rewritten local test file, or any external file whose version was not propagated. Disable
+        // v2 parquet page cache until the scan descriptor carries a stable object version.
+        return {};
+    }
+    const int64_t file_size = file_description.file_size >= 0
+                                      ? file_description.file_size
+                                      : static_cast<int64_t>(file_reader.size());
+    return fmt::format("{}::{}::mtime={}::size={}", file_description.fs_name,
+                       file_reader.path().native(), mtime, file_size);
+}
+
+class DorisRandomAccessFile final : public arrow::io::RandomAccessFile {
+public:
+    DorisRandomAccessFile(io::FileReaderSPtr file_reader, io::IOContext* io_ctx,
+                          bool enable_page_cache, std::string page_cache_file_key)
+            : _file_reader(std::move(file_reader)),
+              _io_ctx(io_ctx),
+              _enable_page_cache(enable_page_cache),
+              _page_cache_file_key(std::move(page_cache_file_key)) {
+        DORIS_CHECK(_file_reader != nullptr);
+        set_mode(arrow::io::FileMode::READ);
+    }
+
+    arrow::Status Close() override {
+        _closed = true;
+        return arrow::Status::OK();
+    }
+
+    bool closed() const override { return _closed; }
+
+    arrow::Result<int64_t> Tell() const override { return _pos; }
+
+    arrow::Status Seek(int64_t position) override {
+        if (position < 0) {
+            return arrow::Status::Invalid("negative seek position");
+        }
+        _pos = position;
+        return arrow::Status::OK();
+    }
+
+    arrow::Result<int64_t> GetSize() override {
+        if (!_file_reader) {
+            return arrow::Status::IOError("Doris file reader is not open");
+        }
+        return static_cast<int64_t>(_file_reader->size());
+    }
+
+    arrow::Result<int64_t> Read(int64_t nbytes, void* out) override {
+        ARROW_ASSIGN_OR_RAISE(auto bytes_read, ReadAt(_pos, nbytes, out));
+        _pos += bytes_read;
+        return bytes_read;
+    }
+
+    arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override {
+        ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes));
+        ARROW_ASSIGN_OR_RAISE(auto bytes_read, Read(nbytes, buffer->mutable_data()));
+        ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+        buffer->ZeroPadding();
+        return buffer;
+    }
+
+    arrow::Result<int64_t> ReadAt(int64_t position, int64_t nbytes, void* out) override {
+        if (!_file_reader) {
+            return arrow::Status::IOError("Doris file reader is not open");
+        }
+        if (position < 0 || nbytes < 0) {
+            return arrow::Status::Invalid("negative read position or length");
+        }
+        if (try_read_from_page_cache(position, nbytes, out)) {
+            return nbytes;
+        }
+        size_t bytes_read = 0;
+        Status st = _file_reader->read_at(
+                static_cast<size_t>(position),
+                Slice(static_cast<uint8_t*>(out), static_cast<size_t>(nbytes)), &bytes_read,
+                _io_ctx);
+        if (!st.ok()) {
+            return arrow::Status::IOError(st.to_string_no_stack());
+        }
+        insert_page_cache(position, nbytes, out, bytes_read);
+        return static_cast<int64_t>(bytes_read);
+    }
+
+    arrow::Result<std::shared_ptr<arrow::Buffer>> ReadAt(int64_t position,
+                                                         int64_t nbytes) override {
+        ARROW_ASSIGN_OR_RAISE(auto buffer, arrow::AllocateResizableBuffer(nbytes));
+        ARROW_ASSIGN_OR_RAISE(auto bytes_read, ReadAt(position, nbytes, buffer->mutable_data()));
+        ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false));
+        buffer->ZeroPadding();
+        return buffer;
+    }
+
+    void register_page_cache_ranges(std::vector<ParquetPageCacheRange> ranges) {
+        std::lock_guard lock(_page_cache_mutex);
+        _page_cache_ranges = std::move(ranges);
+    }
+
+    ParquetPageCacheStats page_cache_stats() const {
+        std::lock_guard lock(_page_cache_mutex);
+        return _page_cache_stats;
+    }
+
+private:
+    bool page_cache_enabled() const {
+        return _enable_page_cache && !config::disable_storage_page_cache &&
+               StoragePageCache::instance() != nullptr && !_page_cache_file_key.empty();
+    }
+
+    bool range_in_page_cache_scope(int64_t position, int64_t nbytes) const {
+        if (nbytes <= 0) {
+            return false;
+        }
+        const int64_t end = position + nbytes;
+        for (const auto& range : _page_cache_ranges) {
+            const int64_t range_end = range.offset + range.size;
+            if (position >= range.offset && end <= range_end) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    StoragePageCache::CacheKey page_cache_key(int64_t position, int64_t nbytes) const {
+        return StoragePageCache::CacheKey(_page_cache_file_key,
+                                          static_cast<size_t>(position + nbytes), position);
+    }
+
+    bool copy_cached_range(const ParquetPageCacheRange& cached_range, int64_t copy_position,
+                           int64_t copy_size, void* out, int64_t output_offset) {
+        PageCacheHandle handle;
+        if (!StoragePageCache::instance()->lookup(
+                    page_cache_key(cached_range.offset, cached_range.size), &handle,
+                    segment_v2::DATA_PAGE)) {
+            unregister_cached_page_range(_page_cache_file_key, cached_range);
+            return false;
+        }
+        Slice cached = handle.data();
+        const int64_t cache_offset = copy_position - cached_range.offset;
+        DORIS_CHECK(cache_offset >= 0);
+        DORIS_CHECK(cached.size >= static_cast<size_t>(cache_offset + copy_size));
+        memcpy(static_cast<uint8_t*>(out) + output_offset, cached.data + cache_offset,
+               static_cast<size_t>(copy_size));
+        return true;
+    }
+
+    bool try_read_from_cached_ranges(int64_t position, int64_t nbytes, void* out) {
+        auto plan = detail::plan_page_cache_range_read(
+                position, nbytes, cached_page_ranges_for_file(_page_cache_file_key));
+        if (plan.empty()) {
+            return false;
+        }
+        for (const auto& entry : plan) {
+            if (!copy_cached_range(entry.cached_range,
+                                   entry.cached_range.offset + entry.copy_offset_in_cache,
+                                   entry.copy_size, out, entry.output_offset)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    bool try_read_from_page_cache(int64_t position, int64_t nbytes, void* out) {
+        std::lock_guard lock(_page_cache_mutex);
+        if (!page_cache_enabled() || !range_in_page_cache_scope(position, nbytes)) {
+            return false;
+        }
+        ++_page_cache_stats.read_count;
+        // Fast path: Arrow issues the same ReadAt(offset, size) again, so the exact
+        // StoragePageCache key matches.
+        // Fallback path: Arrow may read a different but related byte range on another scan.
+        // Examples:
+        // - Current request [120, 150) can be served from cached [100, 200) by copying the
+        //   30-byte subset starting at cached offset 20.
+        // - Current request [100, 260) can be served by stitching cached [100, 180) and
+        //   [180, 260). If any middle span is missing, it is a miss and the file reader fills
+        //   the whole request from storage.
+        if (!copy_cached_range(ParquetPageCacheRange {position, nbytes}, position, nbytes, out,
+                               0) &&
+            !try_read_from_cached_ranges(position, nbytes, out)) {
+            ++_page_cache_stats.miss_count;
+            return false;
+        }
+        ++_page_cache_stats.hit_count;
+        ++_page_cache_stats.compressed_hit_count;
+        return true;
+    }
+
+    void insert_page_cache(int64_t position, int64_t nbytes, const void* data, size_t bytes_read) {
+        std::lock_guard lock(_page_cache_mutex);
+        if (!page_cache_enabled() || !range_in_page_cache_scope(position, nbytes) ||
+            bytes_read != static_cast<size_t>(nbytes)) {
+            return;
+        }
+        auto* page = new DataPage(bytes_read, true, segment_v2::DATA_PAGE);
+        memcpy(page->data(), data, bytes_read);
+        PageCacheHandle handle;
+        StoragePageCache::instance()->insert(page_cache_key(position, nbytes), page, &handle,
+                                             segment_v2::DATA_PAGE);
+        register_cached_page_range(_page_cache_file_key, position, nbytes);
+        ++_page_cache_stats.write_count;
+        ++_page_cache_stats.compressed_write_count;
+    }
+
+    io::FileReaderSPtr _file_reader;
+    io::IOContext* _io_ctx = nullptr;
+    int64_t _pos = 0;
+    bool _closed = false;
+    bool _enable_page_cache = false;
+    std::string _page_cache_file_key;
+    mutable std::mutex _page_cache_mutex;
+    std::vector<ParquetPageCacheRange> _page_cache_ranges;
+    ParquetPageCacheStats _page_cache_stats;
+};
+
+} // namespace
+
+Status arrow_status_to_doris_status(const arrow::Status& status) {
+    if (status.ok()) {
+        return Status::OK();
+    }
+    if (status.IsIOError()) {
+        return Status::IOError(status.ToString());
+    }
+    if (status.IsInvalid()) {
+        return Status::InvalidArgument(status.ToString());
+    }
+    return Status::InternalError(status.ToString());
+}
+
+Status ParquetFileContext::open(io::FileReaderSPtr input_file_reader, io::IOContext* io_ctx,
+                                bool enable_page_cache,
+                                const io::FileDescription& file_description) {
+    DORIS_CHECK(input_file_reader != nullptr);
+    auto page_cache_file_key = build_page_cache_file_key(*input_file_reader, file_description);
+    arrow_file = std::make_shared<DorisRandomAccessFile>(std::move(input_file_reader), io_ctx,
+                                                         enable_page_cache,
+                                                         std::move(page_cache_file_key));
+    try {
+        // TODO: Cache parquet metadata in file system layer to avoid repeated metadata read for same file.
+        this->file_reader = ::parquet::ParquetFileReader::Open(
+                arrow_file, ::parquet::default_reader_properties());
+        metadata = this->file_reader->metadata();
+        schema = metadata != nullptr ? metadata->schema() : nullptr;
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to open parquet file: {}", e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to open parquet file: {}", e.what());
+    }
+
+    if (metadata == nullptr || schema == nullptr) {
+        return Status::Corruption("Failed to read parquet metadata");
+    }
+    return Status::OK();
+}
+
+void ParquetFileContext::register_page_cache_ranges(std::vector<ParquetPageCacheRange> ranges) {
+    DORIS_CHECK(arrow_file != nullptr);
+    static_cast<DorisRandomAccessFile*>(arrow_file.get())
+            ->register_page_cache_ranges(std::move(ranges));
+}
+
+ParquetPageCacheStats ParquetFileContext::page_cache_stats() const {
+    if (arrow_file == nullptr) {
+        return {};
+    }
+    return static_cast<const DorisRandomAccessFile*>(arrow_file.get())->page_cache_stats();
+}
+
+Status ParquetFileContext::close() {
+    if (file_reader != nullptr) {
+        try {
+            file_reader->Close();
+        } catch (const std::exception&) {
+        }
+    }
+    if (arrow_file != nullptr) {
+        static_cast<void>(arrow_status_to_doris_status(arrow_file->Close()));
+    }
+    file_reader.reset();
+    arrow_file.reset();
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_file_context.h b/be/src/format_v2/parquet/parquet_file_context.h
new file mode 100644
index 00000000000000..8dedf732c8fde0
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_file_context.h
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <arrow/io/interfaces.h>
+#include <parquet/api/reader.h>
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+#include "common/status.h"
+#include "io/fs/file_reader.h"
+
+namespace doris::io {
+struct FileDescription;
+} // namespace doris::io
+
+namespace doris::format::parquet {
+
+struct ParquetPageCacheRange {
+    int64_t offset = 0;
+    int64_t size = 0;
+
+    int64_t end_offset() const { return offset + size; }
+};
+
+struct ParquetPageCacheReadPlanEntry {
+    // The exact cached StoragePageCache entry. The final cache key is still exact-range based:
+    // file key + cached_range.end_offset() + cached_range.offset.
+    ParquetPageCacheRange cached_range;
+    // Byte offset inside cached_range to start copying from.
+    int64_t copy_offset_in_cache = 0;
+    // Byte offset inside the current ReadAt output buffer to start writing to.
+    int64_t output_offset = 0;
+    int64_t copy_size = 0;
+};
+
+struct ParquetPageCacheStats {
+    int64_t read_count = 0;
+    int64_t write_count = 0;
+    int64_t compressed_write_count = 0;
+    int64_t hit_count = 0;
+    int64_t miss_count = 0;
+    int64_t compressed_hit_count = 0;
+};
+
+namespace detail {
+
+// Build the copy plan for a ReadAt(position, nbytes) request from the range metadata of
+// previously cached entries.
+// StoragePageCache cannot do range lookup by itself; it can only lookup an exact key. The
+// caller therefore keeps lightweight cached range metadata and uses this function to decide
+// which exact cache entries to fetch and which byte spans to copy.
+// Examples:
+// 1. Subset hit:
+//    request [120, 150), cached [100, 200) -> copy 30 bytes from cached offset 20.
+// 2. Superset hit covered by multiple cached entries:
+//    request [100, 260), cached [100, 180) and [180, 260)
+//    -> two copies: [100, 180) to output offset 0, [180, 260) to output offset 80.
+// 3. Partial overlap is a miss:
+//    request [100, 260), cached [100, 180) only -> empty plan, caller reads from file.
+std::vector<ParquetPageCacheReadPlanEntry> plan_page_cache_range_read(
+        int64_t position, int64_t nbytes, const std::vector<ParquetPageCacheRange>& cached_ranges);
+
+} // namespace detail
+
+struct ParquetFileContext {
+    std::shared_ptr<arrow::io::RandomAccessFile> arrow_file;   // Arrow wrapper for Doris FileReader
+    std::unique_ptr<::parquet::ParquetFileReader> file_reader; // Arrow Parquet file parser
+    std::shared_ptr<::parquet::FileMetaData> metadata;   // footer metadata (RowGroup information)
+    const ::parquet::SchemaDescriptor* schema = nullptr; // physical leaf column schema
+
+    Status open(io::FileReaderSPtr input_file_reader, io::IOContext* io_ctx, bool enable_page_cache,
+                const io::FileDescription& file_description);
+    // Register file ranges that belong to selected Parquet column chunks. Arrow still owns page
+    // decoding, so v2 caches the serialized bytes read inside these ranges and excludes
+    // footer/metadata reads that happen before registration.
+    void register_page_cache_ranges(std::vector<ParquetPageCacheRange> ranges);
+    ParquetPageCacheStats page_cache_stats() const;
+    Status close();
+};
+
+Status arrow_status_to_doris_status(const arrow::Status& status);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_profile.cpp b/be/src/format_v2/parquet/parquet_profile.cpp
new file mode 100644
index 00000000000000..79f979ea0cf1b8
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_profile.cpp
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_profile.h"
+
+#include "format_v2/parquet/parquet_statistics.h"
+
+namespace doris::format::parquet {
+
+void ParquetProfile::init(RuntimeProfile* profile) {
+    if (profile == nullptr) {
+        return;
+    }
+
+    static const char* parquet_profile = "ParquetReader";
+    ADD_TIMER_WITH_LEVEL(profile, parquet_profile, 1);
+
+    filtered_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsFiltered", TUnit::UNIT,
+                                                       parquet_profile, 1);
+    filtered_row_groups_by_min_max = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "RowGroupsFilteredByMinMax", TUnit::UNIT, parquet_profile, 1);
+    filtered_row_groups_by_dictionary = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "RowGroupsFilteredByDictionary", TUnit::UNIT, parquet_profile, 1);
+    filtered_row_groups_by_bloom_filter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "RowGroupsFilteredByBloomFilter", TUnit::UNIT, parquet_profile, 1);
+    to_read_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsReadNum", TUnit::UNIT,
+                                                      parquet_profile, 1);
+    total_row_groups = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowGroupsTotalNum", TUnit::UNIT,
+                                                    parquet_profile, 1);
+    selected_row_ranges = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRowRanges", TUnit::UNIT,
+                                                       parquet_profile, 1);
+    filtered_group_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByGroup", TUnit::UNIT,
+                                                       parquet_profile, 1);
+    filtered_page_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByPage", TUnit::UNIT,
+                                                      parquet_profile, 1);
+    pages_skipped_by_data_page_filter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PagesSkippedByDataPageFilter", TUnit::UNIT, parquet_profile, 1);
+    data_page_filter_skip_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DataPageFilterSkipBytes",
+                                                               TUnit::BYTES, parquet_profile, 1);
+    selected_rows =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SelectedRows", TUnit::UNIT, parquet_profile, 1);
+    rows_filtered_by_conjunct = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RowsFilteredByConjunct",
+                                                             TUnit::UNIT, parquet_profile, 1);
+    total_batches =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "TotalBatches", TUnit::UNIT, parquet_profile, 1);
+    empty_selection_batches = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "EmptySelectionBatches",
+                                                           TUnit::UNIT, parquet_profile, 1);
+    range_gap_skipped_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RangeGapSkippedRows",
+                                                          TUnit::UNIT, parquet_profile, 1);
+    reader_read_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderReadRows", TUnit::UNIT,
+                                                    parquet_profile, 1);
+    reader_skip_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSkipRows", TUnit::UNIT,
+                                                    parquet_profile, 1);
+    reader_select_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ReaderSelectRows", TUnit::UNIT,
+                                                      parquet_profile, 1);
+    arrow_read_records_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "ArrowReadRecordsTime", parquet_profile, 1);
+    materialization_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "MaterializationTime", parquet_profile, 1);
+    lazy_read_filtered_rows = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredRowsByLazyRead",
+                                                           TUnit::UNIT, parquet_profile, 1);
+    filtered_bytes = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FilteredBytes", TUnit::BYTES,
+                                                  parquet_profile, 1);
+    raw_rows_read =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "RawRowsRead", TUnit::UNIT, parquet_profile, 1);
+    column_read_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ColumnReadTime", parquet_profile, 1);
+    parse_meta_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseMetaTime", parquet_profile, 1);
+    parse_footer_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ParseFooterTime", parquet_profile, 1);
+    file_reader_create_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "FileReaderCreateTime", parquet_profile, 1);
+    open_file_num =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "FileNum", TUnit::UNIT, parquet_profile, 1);
+    page_index_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "PageIndexReadCalls", TUnit::UNIT, 1);
+    page_index_filter_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexFilterTime", parquet_profile, 1);
+    read_page_index_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexReadTime", parquet_profile, 1);
+    parse_page_index_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageIndexParseTime", parquet_profile, 1);
+    row_group_filter_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "RowGroupFilterTime", parquet_profile, 1);
+    file_footer_read_calls = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterReadCalls", TUnit::UNIT, 1);
+    file_footer_hit_cache = ADD_COUNTER_WITH_LEVEL(profile, "FileFooterHitCache", TUnit::UNIT, 1);
+    decompress_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecompressTime", parquet_profile, 1);
+    decompress_cnt = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "DecompressCount", TUnit::UNIT,
+                                                  parquet_profile, 1);
+    page_read_counter =
+            ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageReadCount", TUnit::UNIT, parquet_profile, 1);
+    page_cache_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheWriteCount",
+                                                            TUnit::UNIT, parquet_profile, 1);
+    page_cache_compressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PageCacheCompressedWriteCount", TUnit::UNIT, parquet_profile, 1);
+    page_cache_decompressed_write_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PageCacheDecompressedWriteCount", TUnit::UNIT, parquet_profile, 1);
+    page_cache_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheHitCount", TUnit::UNIT,
+                                                          parquet_profile, 1);
+    page_cache_missing_counter = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "PageCacheMissingCount",
+                                                              TUnit::UNIT, parquet_profile, 1);
+    page_cache_compressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PageCacheCompressedHitCount", TUnit::UNIT, parquet_profile, 1);
+    page_cache_decompressed_hit_counter = ADD_CHILD_COUNTER_WITH_LEVEL(
+            profile, "PageCacheDecompressedHitCount", TUnit::UNIT, parquet_profile, 1);
+    decode_header_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderDecodeTime", parquet_profile, 1);
+    read_page_header_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PageHeaderReadTime", parquet_profile, 1);
+    decode_value_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeValueTime", parquet_profile, 1);
+    decode_dict_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeDictTime", parquet_profile, 1);
+    decode_level_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeLevelTime", parquet_profile, 1);
+    decode_null_map_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "DecodeNullMapTime", parquet_profile, 1);
+    skip_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "SkipPageHeaderNum", TUnit::UNIT,
+                                                        parquet_profile, 1);
+    parse_page_header_num = ADD_CHILD_COUNTER_WITH_LEVEL(profile, "ParsePageHeaderNum", TUnit::UNIT,
+                                                         parquet_profile, 1);
+    predicate_filter_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "PredicateFilterTime", parquet_profile, 1);
+    dict_filter_rewrite_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "DictFilterRewriteTime", parquet_profile, 1);
+    convert_time = ADD_CHILD_TIMER_WITH_LEVEL(profile, "ConvertTime", parquet_profile, 1);
+    bloom_filter_read_time =
+            ADD_CHILD_TIMER_WITH_LEVEL(profile, "BloomFilterReadTime", parquet_profile, 1);
+}
+
+void ParquetProfile::update_pruning_stats(const ParquetPruningStats& pruning_stats) const {
+    COUNTER_UPDATE(filtered_row_groups,
+                   pruning_stats.total_row_groups - pruning_stats.selected_row_groups);
+    COUNTER_UPDATE(filtered_row_groups_by_min_max, pruning_stats.filtered_row_groups_by_statistics);
+    COUNTER_UPDATE(filtered_row_groups_by_dictionary,
+                   pruning_stats.filtered_row_groups_by_dictionary);
+    COUNTER_UPDATE(filtered_row_groups_by_bloom_filter,
+                   pruning_stats.filtered_row_groups_by_bloom_filter);
+    COUNTER_UPDATE(to_read_row_groups, pruning_stats.selected_row_groups);
+    COUNTER_UPDATE(total_row_groups, pruning_stats.total_row_groups);
+    COUNTER_UPDATE(selected_row_ranges, pruning_stats.selected_row_ranges);
+    COUNTER_UPDATE(filtered_group_rows, pruning_stats.filtered_group_rows);
+    COUNTER_UPDATE(filtered_page_rows, pruning_stats.filtered_page_rows);
+    COUNTER_UPDATE(page_index_read_calls, pruning_stats.page_index_read_calls);
+    COUNTER_UPDATE(bloom_filter_read_time, pruning_stats.bloom_filter_read_time);
+    COUNTER_UPDATE(row_group_filter_time, pruning_stats.row_group_filter_time);
+    COUNTER_UPDATE(page_index_filter_time, pruning_stats.page_index_filter_time);
+    COUNTER_UPDATE(read_page_index_time, pruning_stats.read_page_index_time);
+}
+
+ParquetPageSkipProfile ParquetProfile::page_skip_profile() const {
+    return {
+            .skipped_pages = pages_skipped_by_data_page_filter,
+            .skipped_bytes = data_page_filter_skip_bytes,
+    };
+}
+
+ParquetColumnReaderProfile ParquetProfile::column_reader_profile() const {
+    return {
+            .reader_read_rows = reader_read_rows,
+            .reader_skip_rows = reader_skip_rows,
+            .reader_select_rows = reader_select_rows,
+            .arrow_read_records_time = arrow_read_records_time,
+            .materialization_time = materialization_time,
+    };
+}
+
+ParquetScanProfile ParquetProfile::scan_profile() const {
+    return {
+            .raw_rows_read = raw_rows_read,
+            .selected_rows = selected_rows,
+            .rows_filtered_by_conjunct = rows_filtered_by_conjunct,
+            .lazy_read_filtered_rows = lazy_read_filtered_rows,
+            .total_batches = total_batches,
+            .empty_selection_batches = empty_selection_batches,
+            .range_gap_skipped_rows = range_gap_skipped_rows,
+            .column_read_time = column_read_time,
+            .predicate_filter_time = predicate_filter_time,
+            .column_reader_profile = column_reader_profile(),
+    };
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_profile.h b/be/src/format_v2/parquet/parquet_profile.h
new file mode 100644
index 00000000000000..8f7623527ca707
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_profile.h
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "runtime/runtime_profile.h"
+
+namespace doris::format::parquet {
+
+struct ParquetPruningStats;
+
+// ============================================================================
+// ============================================================================
+struct ParquetPageSkipProfile {
+    RuntimeProfile::Counter* skipped_pages = nullptr; // number of data pages skipped by page index
+    RuntimeProfile::Counter* skipped_bytes = nullptr; // compressed bytes skipped
+};
+
+// ============================================================================
+// ============================================================================
+struct ParquetColumnReaderProfile {
+    RuntimeProfile::Counter* reader_read_rows = nullptr;        // rows read by read()
+    RuntimeProfile::Counter* reader_skip_rows = nullptr;        // rows skipped by skip()
+    RuntimeProfile::Counter* reader_select_rows = nullptr;      // rows selected by select()
+    RuntimeProfile::Counter* arrow_read_records_time = nullptr; // Arrow RecordReader time (ns)
+    RuntimeProfile::Counter* materialization_time = nullptr;    // value materialization time (ns)
+};
+
+// ============================================================================
+// ============================================================================
+struct ParquetScanProfile {
+    RuntimeProfile::Counter* raw_rows_read = nullptr; // raw rows read from RecordReader
+    RuntimeProfile::Counter* selected_rows = nullptr; // rows selected after conjunct filtering
+    RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr; // rows filtered by conjuncts
+    RuntimeProfile::Counter* lazy_read_filtered_rows =
+            nullptr;                                  // rows avoided by late materialization
+    RuntimeProfile::Counter* total_batches = nullptr; // total batch count
+    RuntimeProfile::Counter* empty_selection_batches =
+            nullptr;                                           // empty batches after full filtering
+    RuntimeProfile::Counter* range_gap_skipped_rows = nullptr; // rows skipped by range gaps
+    RuntimeProfile::Counter* column_read_time = nullptr;       // column read time (ns)
+    RuntimeProfile::Counter* predicate_filter_time = nullptr;  // predicate filter time (ns)
+    ParquetColumnReaderProfile column_reader_profile;          // nested column read statistics
+};
+
+// ============================================================================
+// ============================================================================
+// ============================================================================
+struct ParquetProfile {
+    void init(RuntimeProfile* profile);
+    void update_pruning_stats(const ParquetPruningStats& pruning_stats) const;
+
+    ParquetPageSkipProfile page_skip_profile() const;
+    ParquetColumnReaderProfile column_reader_profile() const;
+    ParquetScanProfile scan_profile() const;
+
+    RuntimeProfile::Counter* filtered_row_groups = nullptr;
+    RuntimeProfile::Counter* filtered_row_groups_by_min_max = nullptr;
+    RuntimeProfile::Counter* filtered_row_groups_by_dictionary = nullptr;
+    RuntimeProfile::Counter* filtered_row_groups_by_bloom_filter = nullptr;
+    RuntimeProfile::Counter* to_read_row_groups = nullptr;
+    RuntimeProfile::Counter* total_row_groups = nullptr;
+    RuntimeProfile::Counter* selected_row_ranges = nullptr;
+    RuntimeProfile::Counter* filtered_group_rows = nullptr;
+    RuntimeProfile::Counter* filtered_page_rows = nullptr;
+
+    // ======== Page Skip ========
+    RuntimeProfile::Counter* pages_skipped_by_data_page_filter = nullptr;
+    RuntimeProfile::Counter* data_page_filter_skip_bytes = nullptr;
+
+    RuntimeProfile::Counter* selected_rows = nullptr;
+    RuntimeProfile::Counter* rows_filtered_by_conjunct = nullptr;
+    RuntimeProfile::Counter* total_batches = nullptr;
+    RuntimeProfile::Counter* empty_selection_batches = nullptr;
+    RuntimeProfile::Counter* range_gap_skipped_rows = nullptr;
+
+    // ======== Column Reader ========
+    RuntimeProfile::Counter* reader_read_rows = nullptr;
+    RuntimeProfile::Counter* reader_skip_rows = nullptr;
+    RuntimeProfile::Counter* reader_select_rows = nullptr;
+    RuntimeProfile::Counter* arrow_read_records_time = nullptr;
+    RuntimeProfile::Counter* materialization_time = nullptr;
+
+    RuntimeProfile::Counter* lazy_read_filtered_rows = nullptr;
+    RuntimeProfile::Counter* filtered_bytes = nullptr;
+    RuntimeProfile::Counter* raw_rows_read = nullptr;
+    RuntimeProfile::Counter* column_read_time = nullptr;
+
+    RuntimeProfile::Counter* parse_meta_time = nullptr;
+    RuntimeProfile::Counter* parse_footer_time = nullptr;
+    RuntimeProfile::Counter* file_reader_create_time = nullptr;
+    RuntimeProfile::Counter* open_file_num = nullptr;
+    RuntimeProfile::Counter* file_footer_read_calls = nullptr;
+    RuntimeProfile::Counter* file_footer_hit_cache = nullptr;
+
+    RuntimeProfile::Counter* row_group_filter_time = nullptr;
+    RuntimeProfile::Counter* page_index_read_calls = nullptr;
+    RuntimeProfile::Counter* page_index_filter_time = nullptr;
+    RuntimeProfile::Counter* read_page_index_time = nullptr;
+    RuntimeProfile::Counter* parse_page_index_time = nullptr;
+
+    RuntimeProfile::Counter* decompress_time = nullptr;
+    RuntimeProfile::Counter* decompress_cnt = nullptr;
+    RuntimeProfile::Counter* page_read_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_write_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_compressed_write_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_decompressed_write_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_hit_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_missing_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_compressed_hit_counter = nullptr;
+    RuntimeProfile::Counter* page_cache_decompressed_hit_counter = nullptr;
+
+    RuntimeProfile::Counter* decode_header_time = nullptr;
+    RuntimeProfile::Counter* read_page_header_time = nullptr;
+    RuntimeProfile::Counter* decode_value_time = nullptr;
+    RuntimeProfile::Counter* decode_dict_time = nullptr;
+    RuntimeProfile::Counter* decode_level_time = nullptr;
+    RuntimeProfile::Counter* decode_null_map_time = nullptr;
+    RuntimeProfile::Counter* skip_page_header_num = nullptr;
+    RuntimeProfile::Counter* parse_page_header_num = nullptr;
+
+    RuntimeProfile::Counter* predicate_filter_time = nullptr;
+    RuntimeProfile::Counter* dict_filter_rewrite_time = nullptr;
+    RuntimeProfile::Counter* convert_time = nullptr;
+    RuntimeProfile::Counter* bloom_filter_read_time = nullptr;
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_reader.cpp b/be/src/format_v2/parquet/parquet_reader.cpp
new file mode 100644
index 00000000000000..24797200693020
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_reader.cpp
@@ -0,0 +1,674 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_reader.h"
+
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <optional>
+#include <ranges>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_file_context.h"
+#include "format_v2/parquet/parquet_scan.h"
+#include "format_v2/parquet/parquet_statistics.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format::parquet {
+
+struct ParquetReaderScanState {
+    ParquetFileContext file_context;
+    std::vector<std::unique_ptr<ParquetColumnSchema>> file_schema;
+    RowGroupScanPlan scan_plan;
+    ParquetScanScheduler scheduler;
+    const cctz::time_zone* timezone = nullptr;
+    bool enable_bloom_filter = false;
+    bool enable_page_cache = false;
+    bool enable_strict_mode = false;
+};
+
+int64_t column_chunk_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? cast_set<int64_t>(column_metadata.dictionary_page_offset())
+                   : cast_set<int64_t>(column_metadata.data_page_offset());
+}
+
+void collect_all_leaf_column_ids(const ParquetColumnSchema& column_schema,
+                                 std::unordered_set<int>* leaf_column_ids) {
+    DORIS_CHECK(leaf_column_ids != nullptr);
+    if (column_schema.kind == ParquetColumnSchemaKind::PRIMITIVE) {
+        if (column_schema.leaf_column_id >= 0) {
+            leaf_column_ids->insert(column_schema.leaf_column_id);
+        }
+        return;
+    }
+    for (const auto& child : column_schema.children) {
+        DORIS_CHECK(child != nullptr);
+        collect_all_leaf_column_ids(*child, leaf_column_ids);
+    }
+}
+
+void collect_projected_leaf_column_ids(const ParquetColumnSchema& column_schema,
+                                       const format::LocalColumnIndex& projection,
+                                       std::unordered_set<int>* leaf_column_ids) {
+    DORIS_CHECK(leaf_column_ids != nullptr);
+    if (projection.project_all_children || projection.children.empty()) {
+        collect_all_leaf_column_ids(column_schema, leaf_column_ids);
+        return;
+    }
+    for (const auto& child_projection : projection.children) {
+        const auto child_it =
+                std::ranges::find_if(column_schema.children, [&](const auto& child_schema) {
+                    return child_schema->local_id == child_projection.local_id();
+                });
+        DORIS_CHECK(child_it != column_schema.children.end());
+        collect_projected_leaf_column_ids(**child_it, child_projection, leaf_column_ids);
+    }
+}
+
+void collect_request_leaf_column_ids(
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, std::unordered_set<int>* leaf_column_ids) {
+    DORIS_CHECK(leaf_column_ids != nullptr);
+    auto collect_scan_column = [&](const format::LocalColumnIndex& projection) {
+        const auto local_id = projection.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID ||
+            local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            return;
+        }
+        DORIS_CHECK(local_id >= 0 && local_id < static_cast<int32_t>(file_schema.size()));
+        DORIS_CHECK(file_schema[local_id] != nullptr);
+        collect_projected_leaf_column_ids(*file_schema[local_id], projection, leaf_column_ids);
+    };
+    for (const auto& column : request.predicate_columns) {
+        collect_scan_column(column);
+    }
+    for (const auto& column : request.non_predicate_columns) {
+        collect_scan_column(column);
+    }
+}
+
+std::vector<ParquetPageCacheRange> build_page_cache_ranges(
+        const ::parquet::FileMetaData& metadata,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, const RowGroupScanPlan& row_group_plan) {
+    std::unordered_set<int> leaf_column_ids;
+    collect_request_leaf_column_ids(file_schema, request, &leaf_column_ids);
+    std::vector<ParquetPageCacheRange> ranges;
+    ranges.reserve(row_group_plan.row_groups.size() * leaf_column_ids.size());
+    for (const auto& row_group_plan_item : row_group_plan.row_groups) {
+        auto row_group_metadata = metadata.RowGroup(row_group_plan_item.row_group_id);
+        DORIS_CHECK(row_group_metadata != nullptr);
+        for (const auto leaf_column_id : leaf_column_ids) {
+            DORIS_CHECK(leaf_column_id >= 0 && leaf_column_id < row_group_metadata->num_columns());
+            auto column_metadata = row_group_metadata->ColumnChunk(leaf_column_id);
+            DORIS_CHECK(column_metadata != nullptr);
+            const int64_t offset = column_chunk_start_offset(*column_metadata);
+            const int64_t size = column_metadata->total_compressed_size();
+            DORIS_CHECK(offset >= 0);
+            DORIS_CHECK(size >= 0);
+            if (size > 0) {
+                ranges.push_back(ParquetPageCacheRange {.offset = offset, .size = size});
+            }
+        }
+    }
+    return ranges;
+}
+
+const ParquetColumnSchema& projected_root_schema(
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::LocalColumnIndex& projection) {
+    const auto local_id = projection.local_id();
+    DORIS_CHECK(local_id >= 0 && local_id < static_cast<int32_t>(file_schema.size()));
+    DORIS_CHECK(file_schema[local_id] != nullptr);
+    return *file_schema[local_id];
+}
+
+int64_t count_loaded_non_null_values(const ParquetColumnSchema& root_schema,
+                                     const ParquetColumnReader& shape_reader,
+                                     int64_t expected_rows) {
+    const auto& def_levels = shape_reader.nested_definition_levels();
+    const auto& rep_levels = shape_reader.nested_repetition_levels();
+    const int64_t levels_written = shape_reader.nested_levels_written();
+    DORIS_CHECK(levels_written >= expected_rows);
+    if (root_schema.max_repetition_level == 0) {
+        DORIS_CHECK(levels_written == expected_rows);
+        const int16_t non_null_definition_level = root_schema.nullable_definition_level;
+        int64_t count = 0;
+        for (int64_t level_idx = 0; level_idx < levels_written; ++level_idx) {
+            count += def_levels[level_idx] >= non_null_definition_level ? 1 : 0;
+        }
+        return count;
+    }
+
+    // For repeated encodings, one top-level row starts when the leaf repetition level moves above
+    // no higher than the top-level container's repeated boundary. Empty MAP/LIST rows have no
+    // entries but still carry a level slot; they are non-NULL and must be counted by count(col).
+    const int16_t non_null_definition_level =
+            static_cast<int16_t>(root_schema.definition_level - 1);
+    int64_t counted_rows = 0;
+    int64_t non_null_rows = 0;
+    for (int64_t level_idx = 0; level_idx < levels_written && counted_rows < expected_rows;
+         ++level_idx) {
+        if (rep_levels[level_idx] >= root_schema.repetition_level) {
+            continue;
+        }
+        ++counted_rows;
+        non_null_rows += def_levels[level_idx] >= non_null_definition_level ? 1 : 0;
+    }
+    DORIS_CHECK(counted_rows == expected_rows);
+    return non_null_rows;
+}
+
+DataTypePtr nullable_like_original(const DataTypePtr& type, DataTypePtr nested_type) {
+    return type != nullptr && type->is_nullable() ? make_nullable(nested_type) : nested_type;
+}
+
+int timestamp_tz_scale(const ParquetTypeDescriptor& type_descriptor) {
+    switch (type_descriptor.time_unit) {
+    case ParquetTimeUnit::MILLIS:
+        return 3;
+    case ParquetTimeUnit::MICROS:
+    case ParquetTimeUnit::UNKNOWN:
+    default:
+        return 6;
+    }
+}
+
+bool should_map_to_timestamp_tz(const ParquetColumnSchema& column_schema) {
+    const auto& type_descriptor = column_schema.type_descriptor;
+    return type_descriptor.physical_type == ::parquet::Type::INT96 ||
+           (type_descriptor.is_timestamp && type_descriptor.timestamp_is_adjusted_to_utc);
+}
+
+DataTypePtr apply_timestamp_tz_mapping(ParquetColumnSchema* column_schema) {
+    DORIS_CHECK(column_schema != nullptr);
+    if (column_schema->kind == ParquetColumnSchemaKind::PRIMITIVE) {
+        if (should_map_to_timestamp_tz(*column_schema)) {
+            const bool nullable =
+                    column_schema->type != nullptr && column_schema->type->is_nullable();
+            const auto scale = timestamp_tz_scale(column_schema->type_descriptor);
+            column_schema->type = DataTypeFactory::instance().create_data_type(TYPE_TIMESTAMPTZ,
+                                                                               nullable, 0, scale);
+            column_schema->type_descriptor.doris_type = column_schema->type;
+        }
+        return column_schema->type;
+    }
+
+    std::vector<DataTypePtr> child_types;
+    child_types.reserve(column_schema->children.size());
+    for (auto& child : column_schema->children) {
+        child_types.push_back(apply_timestamp_tz_mapping(child.get()));
+    }
+
+    if (column_schema->kind == ParquetColumnSchemaKind::LIST) {
+        DORIS_CHECK(child_types.size() == 1);
+        column_schema->type = nullable_like_original(
+                column_schema->type, std::make_shared<DataTypeArray>(child_types[0]));
+    } else if (column_schema->kind == ParquetColumnSchemaKind::MAP) {
+        DORIS_CHECK(child_types.size() == 2);
+        column_schema->type = nullable_like_original(
+                column_schema->type, std::make_shared<DataTypeMap>(make_nullable(child_types[0]),
+                                                                   make_nullable(child_types[1])));
+    } else if (column_schema->kind == ParquetColumnSchemaKind::STRUCT) {
+        Strings child_names;
+        child_names.reserve(column_schema->children.size());
+        for (const auto& child : column_schema->children) {
+            child_names.push_back(child->name);
+        }
+        column_schema->type = nullable_like_original(
+                column_schema->type, std::make_shared<DataTypeStruct>(child_types, child_names));
+    }
+    return column_schema->type;
+}
+
+static Status find_projected_minmax_leaf(const ParquetColumnSchema& column_schema,
+                                         const format::LocalColumnIndex& projection,
+                                         const ParquetColumnSchema** leaf_schema) {
+    DORIS_CHECK(leaf_schema != nullptr);
+    if (projection.project_all_children || projection.children.empty()) {
+        if (column_schema.leaf_column_id < 0) {
+            return Status::NotSupported(
+                    "Parquet aggregate pushdown only supports primitive column {}",
+                    column_schema.name);
+        }
+        if (column_schema.max_repetition_level > 0) {
+            return Status::NotSupported(
+                    "Parquet aggregate pushdown does not support repeated column {}",
+                    column_schema.name);
+        }
+        *leaf_schema = &column_schema;
+        return Status::OK();
+    }
+    if (projection.children.size() != 1) {
+        return Status::NotSupported(
+                "Parquet aggregate pushdown only supports a single nested leaf under column {}",
+                column_schema.name);
+    }
+    const auto& child_projection = projection.children[0];
+    const auto child_schema_it =
+            std::ranges::find_if(column_schema.children, [&](const auto& child_schema) {
+                return child_schema->local_id == child_projection.local_id();
+            });
+    if (child_schema_it != column_schema.children.end()) {
+        return find_projected_minmax_leaf(**child_schema_it, child_projection, leaf_schema);
+    }
+    return Status::InvalidArgument("Invalid parquet aggregate projection local id {} for column {}",
+                                   child_projection.local_id(), column_schema.name);
+}
+
+void ParquetReader::_fill_column_definition(const ParquetColumnSchema& column_schema,
+                                            format::ColumnDefinition* field) const {
+    if (column_schema.parquet_field_id >= 0) {
+        field->identifier = Field::create_field<TYPE_INT>(column_schema.parquet_field_id);
+    } else {
+        field->identifier = Field::create_field<TYPE_STRING>(column_schema.name);
+    }
+    field->local_id = column_schema.local_id;
+    field->name = column_schema.name;
+    field->type = column_schema.type != nullptr && !column_schema.type->is_nullable()
+                          ? make_nullable(column_schema.type)
+                          : column_schema.type;
+    field->children.clear();
+    field->children.reserve(column_schema.children.size());
+    for (const auto& child : column_schema.children) {
+        format::ColumnDefinition child_field;
+        _fill_column_definition(*child, &child_field);
+        field->children.push_back(std::move(child_field));
+    }
+}
+
+ParquetReader::ParquetReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                             std::unique_ptr<io::FileDescription>& file_description,
+                             std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                             std::optional<format::GlobalRowIdContext> global_rowid_context,
+                             bool enable_mapping_timestamp_tz)
+        : FileReader(system_properties, file_description, io_ctx, profile),
+          _global_rowid_context(global_rowid_context),
+          _enable_mapping_timestamp_tz(enable_mapping_timestamp_tz) {}
+
+ParquetReader::~ParquetReader() = default;
+
+Status ParquetReader::init(RuntimeState* state) {
+    RETURN_IF_ERROR(format::FileReader::init(state));
+    if (_profile != nullptr) {
+        COUNTER_UPDATE(_parquet_profile.file_reader_create_time,
+                       _reader_statistics.file_reader_create_time);
+        COUNTER_UPDATE(_parquet_profile.open_file_num, _reader_statistics.open_file_num);
+    }
+    _state = std::make_unique<ParquetReaderScanState>();
+    _state->enable_bloom_filter =
+            state != nullptr && state->query_options().enable_parquet_filter_by_bloom_filter;
+    _state->enable_page_cache =
+            state != nullptr && state->query_options().enable_parquet_file_page_cache;
+    if (state != nullptr) {
+        _state->timezone = &state->timezone_obj();
+        _state->enable_strict_mode = state->enable_strict_mode();
+        _state->scheduler.set_timezone(&state->timezone_obj());
+        _state->scheduler.set_enable_strict_mode(_state->enable_strict_mode);
+    }
+    _state->scheduler.set_batch_size(_batch_size);
+    // Open parquet file and parse metadata to get file schema.
+    RETURN_IF_ERROR(_state->file_context.open(_tracing_file_reader, _io_ctx.get(),
+                                              _state->enable_page_cache, *_file_description));
+    // Build file schema from parquet metadata.
+    // A file reader may expose raw file identifiers, such as Parquet field_id, through ColumnDefinition::identifier
+    RETURN_IF_ERROR(
+            build_parquet_column_schema(*_state->file_context.schema, &_state->file_schema));
+    if (_enable_mapping_timestamp_tz) {
+        for (auto& column_schema : _state->file_schema) {
+            apply_timestamp_tz_mapping(column_schema.get());
+        }
+    }
+    return Status::OK();
+}
+
+void ParquetReader::set_batch_size(size_t batch_size) {
+    _batch_size = std::max<size_t>(1, batch_size);
+    if (_state != nullptr) {
+        _state->scheduler.set_batch_size(_batch_size);
+    }
+}
+
+Status ParquetReader::get_schema(std::vector<format::ColumnDefinition>* file_schema) const {
+    if (file_schema == nullptr) {
+        return Status::InvalidArgument("file_schema is null");
+    }
+    file_schema->clear();
+    if (_state == nullptr || _state->file_context.schema == nullptr) {
+        return Status::Uninitialized("ParquetReader is not open");
+    }
+
+    file_schema->reserve(_state->file_schema.size());
+    for (size_t column_idx = 0; column_idx < _state->file_schema.size(); ++column_idx) {
+        format::ColumnDefinition field;
+        _fill_column_definition(*_state->file_schema[column_idx], &field);
+        DORIS_CHECK(field.local_id == static_cast<int32_t>(column_idx));
+        file_schema->push_back(std::move(field));
+    }
+    if (_global_rowid_context.has_value()) {
+        file_schema->push_back(format::global_rowid_column_definition());
+    }
+    return Status::OK();
+}
+
+std::unique_ptr<format::TableColumnMapper> ParquetReader::create_column_mapper(
+        format::TableColumnMapperOptions options) const {
+    return std::make_unique<format::ParquetColumnMapper>(std::move(options));
+}
+
+Status ParquetReader::open(std::shared_ptr<format::FileScanRequest> request) {
+    if (_state == nullptr || _state->file_context.metadata == nullptr ||
+        _state->file_context.schema == nullptr) {
+        return Status::Uninitialized("ParquetReader is not open");
+    }
+    auto request_snapshot = request;
+    DORIS_CHECK(request_snapshot != nullptr);
+    RETURN_IF_ERROR(format::FileReader::open(std::move(request)));
+
+    const int num_fields = static_cast<int>(_state->file_schema.size());
+    for (const auto& column_filter : request_snapshot->column_predicate_filters) {
+        const auto file_column_id = column_filter.effective_file_column_id();
+        if (!file_column_id.is_valid() || file_column_id.value() >= num_fields) {
+            return Status::InvalidArgument("Invalid parquet filter top-level local id {}",
+                                           file_column_id.value());
+        }
+    }
+
+    // `local_positions.empty()` means all columns are needed by table reader
+    // TODO(gabriel): It will happen only for TVF `select *` query.
+    if (request_snapshot->local_positions.empty()) {
+        for (const auto& col : request_snapshot->predicate_columns) {
+            request_snapshot->local_positions.emplace(col.column_id(),
+                                                      format::LocalIndex(col.column_id().value()));
+        }
+        for (const auto& col : request_snapshot->non_predicate_columns) {
+            request_snapshot->local_positions.emplace(col.column_id(),
+                                                      format::LocalIndex(col.column_id().value()));
+        }
+    }
+
+    for (const auto& col : request_snapshot->predicate_columns) {
+        DORIS_CHECK(request_snapshot->local_positions.count(col.column_id()) > 0);
+        const auto local_id = col.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID ||
+            local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            continue;
+        }
+        DORIS_CHECK(local_id >= 0 && local_id < num_fields);
+    }
+    for (const auto& col : request_snapshot->non_predicate_columns) {
+        DORIS_CHECK(request_snapshot->local_positions.count(col.column_id()) > 0);
+        const auto local_id = col.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID ||
+            local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            continue;
+        }
+        DORIS_CHECK(local_id >= 0 && local_id < num_fields);
+    }
+
+    RowGroupScanPlan row_group_plan;
+    ParquetScanRange scan_range;
+    scan_range.start_offset = _file_description->range_start_offset;
+    scan_range.size = _file_description->range_size;
+    scan_range.file_size = _file_description->file_size;
+    // Get selected ranges in row groups according to metadata (Row-Group level index and Page Index including Zonemap, Dictionary, Bloom Filter).
+    RETURN_IF_ERROR(plan_parquet_row_groups(
+            *_state->file_context.metadata, _state->file_context.file_reader.get(),
+            _state->file_schema, *request_snapshot, scan_range, _state->enable_bloom_filter,
+            &row_group_plan, _state->timezone));
+    if (_profile != nullptr) {
+        _parquet_profile.update_pruning_stats(row_group_plan.pruning_stats);
+    }
+    if (_state->enable_page_cache) {
+        _state->file_context.register_page_cache_ranges(
+                build_page_cache_ranges(*_state->file_context.metadata, _state->file_schema,
+                                        *request_snapshot, row_group_plan));
+    }
+    _state->scan_plan = row_group_plan;
+    _state->scheduler.set_page_skip_profile(_parquet_profile.page_skip_profile());
+    _state->scheduler.set_global_rowid_context(_global_rowid_context);
+    _state->scheduler.set_scan_profile(_parquet_profile.scan_profile());
+    _state->scheduler.set_plan(std::move(row_group_plan));
+    _eof = _state->scheduler.empty();
+    return Status::OK();
+}
+
+Status ParquetReader::get_block(Block* file_block, size_t* rows, bool* eof) {
+    if (_state == nullptr || _state->file_context.file_reader == nullptr ||
+        _state->file_context.schema == nullptr) {
+        return Status::Uninitialized("ParquetReader is not open");
+    }
+    *rows = 0;
+    if (_eof) {
+        *eof = true;
+        return Status::OK();
+    }
+    auto request_snapshot = _request;
+    if (request_snapshot == nullptr) {
+        return Status::Cancelled("ParquetReader is closed");
+    }
+
+    const auto predicate_filtered_rows_before = _state->scheduler.predicate_filtered_rows();
+    RETURN_IF_ERROR(_state->scheduler.read_next_batch(_state->file_context, _state->file_schema,
+                                                      *request_snapshot, file_block, rows, eof));
+    _sync_page_cache_profile();
+    if (_io_ctx != nullptr) {
+        _io_ctx->predicate_filtered_rows +=
+                _state->scheduler.predicate_filtered_rows() - predicate_filtered_rows_before;
+    }
+    _eof = *eof;
+    return Status::OK();
+}
+
+void ParquetReader::_sync_page_cache_profile() {
+    if (_profile == nullptr || _state == nullptr) {
+        return;
+    }
+    const auto stats = _state->file_context.page_cache_stats();
+    COUNTER_UPDATE(_parquet_profile.page_read_counter,
+                   stats.read_count - _reported_page_cache_stats.read_count);
+    COUNTER_UPDATE(_parquet_profile.page_cache_write_counter,
+                   stats.write_count - _reported_page_cache_stats.write_count);
+    COUNTER_UPDATE(
+            _parquet_profile.page_cache_compressed_write_counter,
+            stats.compressed_write_count - _reported_page_cache_stats.compressed_write_count);
+    COUNTER_UPDATE(_parquet_profile.page_cache_hit_counter,
+                   stats.hit_count - _reported_page_cache_stats.hit_count);
+    COUNTER_UPDATE(_parquet_profile.page_cache_missing_counter,
+                   stats.miss_count - _reported_page_cache_stats.miss_count);
+    COUNTER_UPDATE(_parquet_profile.page_cache_compressed_hit_counter,
+                   stats.compressed_hit_count - _reported_page_cache_stats.compressed_hit_count);
+    _reported_page_cache_stats = stats;
+}
+
+void ParquetReader::set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) {
+    if (_state == nullptr) {
+        return;
+    }
+    _state->scheduler.set_condition_cache_context(std::move(ctx));
+    if (_io_ctx != nullptr) {
+        // Condition-cache HIT filters row ranges before batch reading, so skipped rows never belong
+        // to a later get_block() batch. Report the plan-level skipped rows at the same point where
+        // the scan plan is rewritten.
+        _io_ctx->condition_cache_filtered_rows += _state->scheduler.condition_cache_filtered_rows();
+    }
+}
+
+int64_t ParquetReader::get_total_rows() const {
+    if (_state == nullptr) {
+        return 0;
+    }
+    int64_t rows = 0;
+    for (const auto& row_group_plan : _state->scan_plan.row_groups) {
+        rows += row_group_plan.row_group_rows;
+    }
+    return rows;
+}
+
+Status ParquetReader::get_aggregate_result(const format::FileAggregateRequest& request,
+                                           format::FileAggregateResult* result) {
+    DORIS_CHECK(result != nullptr);
+    if (_state == nullptr || _state->file_context.metadata == nullptr ||
+        _state->file_context.schema == nullptr) {
+        return Status::Uninitialized("ParquetReader is not open");
+    }
+    result->count = 0;
+    result->columns.clear();
+    if (request.agg_type != TPushAggOp::type::COUNT &&
+        request.agg_type != TPushAggOp::type::MINMAX) {
+        return Status::NotSupported("Unsupported parquet aggregate pushdown type {}",
+                                    request.agg_type);
+    }
+
+    // Aggregate row count in all selected row groups. For MIN/MAX aggregate, this is used to determine whether there is no row group selected.
+    for (const auto& row_group_plan : _state->scan_plan.row_groups) {
+        auto row_group_metadata =
+                _state->file_context.metadata->RowGroup(row_group_plan.row_group_id);
+        DORIS_CHECK(row_group_metadata != nullptr);
+        result->count += row_group_metadata->num_rows();
+    }
+    if (request.agg_type == TPushAggOp::type::COUNT) {
+        if (request.columns.empty()) {
+            return Status::OK();
+        }
+        if (request.columns.size() != 1) {
+            return Status::NotSupported("Parquet COUNT pushdown only supports one count column");
+        }
+        const auto& count_projection = request.columns[0].projection;
+        const auto& root_schema = projected_root_schema(_state->file_schema, count_projection);
+        result->count = 0;
+        for (const auto& row_group_plan : _state->scan_plan.row_groups) {
+            std::shared_ptr<::parquet::RowGroupReader> row_group;
+            try {
+                row_group = _state->file_context.file_reader->RowGroup(row_group_plan.row_group_id);
+            } catch (const ::parquet::ParquetException& e) {
+                return Status::Corruption("Failed to open parquet row group {}: {}",
+                                          row_group_plan.row_group_id, e.what());
+            } catch (const std::exception& e) {
+                return Status::InternalError("Failed to open parquet row group {}: {}",
+                                             row_group_plan.row_group_id, e.what());
+            }
+
+            ParquetColumnReaderFactory column_reader_factory(
+                    row_group, _state->file_context.schema->num_columns(),
+                    &row_group_plan.page_skip_plans, _parquet_profile.page_skip_profile(),
+                    _state->timezone, _state->enable_strict_mode,
+                    _parquet_profile.scan_profile().column_reader_profile);
+            std::unique_ptr<ParquetColumnReader> shape_reader;
+            RETURN_IF_ERROR(column_reader_factory.create_count_shape_reader(
+                    root_schema, &count_projection, &shape_reader));
+            DORIS_CHECK(shape_reader != nullptr);
+
+            int64_t row_group_cursor = 0;
+            for (const auto& selected_range : row_group_plan.selected_ranges) {
+                DORIS_CHECK(selected_range.start >= row_group_cursor);
+                RETURN_IF_ERROR(shape_reader->skip(selected_range.start - row_group_cursor));
+                row_group_cursor = selected_range.start;
+
+                int64_t range_rows_read = 0;
+                while (range_rows_read < selected_range.length) {
+                    const int64_t batch_rows =
+                            std::min<int64_t>(_batch_size, selected_range.length - range_rows_read);
+                    // COUNT(col) only needs the top-level NULL state. The shape reader loads
+                    // def/rep levels from one representative leaf and does not build value_indices
+                    // or values_column. MAP chooses the key leaf; ARRAY/STRUCT may choose a string
+                    // leaf, but the levels-only protocol still avoids Doris-side string
+                    // materialization for that leaf.
+                    RETURN_IF_ERROR(shape_reader->load_nested_levels_batch(batch_rows));
+                    result->count +=
+                            count_loaded_non_null_values(root_schema, *shape_reader, batch_rows);
+                    range_rows_read += batch_rows;
+                    row_group_cursor += batch_rows;
+                }
+            }
+        }
+        return Status::OK();
+    }
+
+    result->columns.resize(request.columns.size());
+    for (size_t request_column_idx = 0; request_column_idx < request.columns.size();
+         ++request_column_idx) {
+        const auto file_column_id = request.columns[request_column_idx].projection.local_id();
+        if (file_column_id < 0 ||
+            file_column_id >= static_cast<int32_t>(_state->file_schema.size())) {
+            return Status::InvalidArgument("Invalid parquet aggregate column id {}",
+                                           file_column_id);
+        }
+        const auto& column_schema = _state->file_schema[file_column_id];
+        DORIS_CHECK(column_schema != nullptr);
+        const ParquetColumnSchema* leaf_schema = nullptr;
+        RETURN_IF_ERROR(find_projected_minmax_leaf(
+                *column_schema, request.columns[request_column_idx].projection, &leaf_schema));
+        DORIS_CHECK(leaf_schema != nullptr);
+
+        auto& aggregate_column = result->columns[request_column_idx];
+        aggregate_column.projection = request.columns[request_column_idx].projection;
+        for (const auto& row_group_plan : _state->scan_plan.row_groups) {
+            auto row_group_metadata =
+                    _state->file_context.metadata->RowGroup(row_group_plan.row_group_id);
+            DORIS_CHECK(row_group_metadata != nullptr);
+            auto column_chunk = row_group_metadata->ColumnChunk(leaf_schema->leaf_column_id);
+            DORIS_CHECK(column_chunk != nullptr);
+            const auto statistics = ParquetStatisticsUtils::TransformColumnStatistics(
+                    *leaf_schema, column_chunk->statistics(), _state->timezone);
+            if (!statistics.has_min_max) {
+                return Status::NotSupported("Missing parquet min/max statistics for column {}",
+                                            leaf_schema->name);
+            }
+            if (!aggregate_column.has_min || statistics.min_value < aggregate_column.min_value) {
+                aggregate_column.min_value = statistics.min_value;
+                aggregate_column.has_min = true;
+            }
+            if (!aggregate_column.has_max || aggregate_column.max_value < statistics.max_value) {
+                aggregate_column.max_value = statistics.max_value;
+                aggregate_column.has_max = true;
+            }
+        }
+        if (!aggregate_column.has_min || !aggregate_column.has_max) {
+            return Status::NotSupported("No parquet row group selected for min/max pushdown");
+        }
+    }
+    return Status::OK();
+}
+
+Status ParquetReader::close() {
+    if (_state != nullptr) {
+        _sync_page_cache_profile();
+        RETURN_IF_ERROR(_state->file_context.close());
+    }
+    return FileReader::close();
+}
+
+void ParquetReader::_init_profile() {
+    _parquet_profile.init(_profile);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_reader.h b/be/src/format_v2/parquet/parquet_reader.h
new file mode 100644
index 00000000000000..ff74b97a26e0e7
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_reader.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_file_context.h"
+#include "format_v2/parquet/parquet_profile.h"
+#include "format_v2/parquet/parquet_scan.h"
+
+namespace doris {
+namespace io {
+struct IOContext;
+} // namespace io
+} // namespace doris
+
+namespace doris::format::parquet {
+
+struct ParquetReaderScanState;
+
+// ============================================================================
+// ============================================================================
+//   init() -> get_schema() -> open(request) -> get_block() [loop] -> close()
+// ============================================================================
+class ParquetReader : public format::FileReader {
+public:
+    ParquetReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                  std::unique_ptr<io::FileDescription>& file_description,
+                  std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                  std::optional<format::GlobalRowIdContext> global_rowid_context = std::nullopt,
+                  bool enable_mapping_timestamp_tz = false);
+    ~ParquetReader() override;
+
+    Status init(RuntimeState* state) override;
+
+    void set_batch_size(size_t batch_size) override;
+
+    Status get_schema(std::vector<format::ColumnDefinition>* file_schema) const override;
+
+    std::unique_ptr<format::TableColumnMapper> create_column_mapper(
+            format::TableColumnMapperOptions options) const override;
+
+    Status open(std::shared_ptr<format::FileScanRequest> request) override;
+
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override;
+
+    Status get_aggregate_result(const format::FileAggregateRequest& request,
+                                format::FileAggregateResult* result) override;
+
+    void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) override;
+
+    int64_t get_total_rows() const override;
+
+    Status close() override;
+
+protected:
+    void _init_profile() override;
+
+private:
+    void _sync_page_cache_profile();
+
+    void _fill_column_definition(const ParquetColumnSchema& column_schema,
+                                 format::ColumnDefinition* field) const;
+
+    std::unique_ptr<ParquetReaderScanState>
+            _state;                  // complete scan state (file_context + schema + scheduler)
+    ParquetProfile _parquet_profile; // RuntimeProfile counter set
+    ParquetPageCacheStats _reported_page_cache_stats;
+    std::optional<format::GlobalRowIdContext> _global_rowid_context; // global RowId context
+    size_t _batch_size = ParquetScanScheduler::DEFAULT_READ_BATCH_SIZE;
+    bool _enable_mapping_timestamp_tz = false; // whether UTC timestamps are mapped to TIMESTAMPTZ
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_scan.cpp b/be/src/format_v2/parquet/parquet_scan.cpp
new file mode 100644
index 00000000000000..d636f3e3f9ee41
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_scan.cpp
@@ -0,0 +1,648 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_scan.h"
+
+#include <algorithm>
+#include <limits>
+#include <memory>
+#include <utility>
+
+#include "common/exception.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_vector.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_file_context.h"
+#include "format_v2/parquet/parquet_statistics.h"
+
+namespace doris::format::parquet {
+
+namespace {
+
+int64_t column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? cast_set<int64_t>(column_metadata.dictionary_page_offset())
+                   : cast_set<int64_t>(column_metadata.data_page_offset());
+}
+
+bool is_row_group_outside_range(const ::parquet::FileMetaData& metadata,
+                                const ParquetScanRange& scan_range, int row_group_idx) {
+    if (scan_range.size < 0) {
+        return false;
+    }
+    const int64_t range_start_offset = scan_range.start_offset;
+    const int64_t range_end_offset = range_start_offset + scan_range.size;
+    DORIS_CHECK(range_start_offset >= 0);
+    DORIS_CHECK(range_end_offset >= range_start_offset);
+    if (range_start_offset == 0 &&
+        (scan_range.file_size < 0 || range_end_offset >= scan_range.file_size)) {
+        return false;
+    }
+
+    auto row_group_metadata = metadata.RowGroup(row_group_idx);
+    DORIS_CHECK(row_group_metadata != nullptr);
+    DORIS_CHECK(row_group_metadata->num_columns() > 0);
+    const auto first_column = row_group_metadata->ColumnChunk(0);
+    const auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1);
+    DORIS_CHECK(first_column != nullptr);
+    DORIS_CHECK(last_column != nullptr);
+    const int64_t row_group_start_offset = column_start_offset(*first_column);
+    const int64_t row_group_end_offset =
+            column_start_offset(*last_column) + last_column->total_compressed_size();
+    const int64_t row_group_mid_offset =
+            row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2;
+    return row_group_mid_offset < range_start_offset || row_group_mid_offset >= range_end_offset;
+}
+
+} // namespace
+
+Status plan_parquet_row_groups(const ::parquet::FileMetaData& metadata,
+                               ::parquet::ParquetFileReader* file_reader,
+                               const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                               const format::FileScanRequest& request,
+                               const ParquetScanRange& scan_range, bool enable_bloom_filter,
+                               RowGroupScanPlan* plan, const cctz::time_zone* timezone) {
+    DORIS_CHECK(plan != nullptr);
+    plan->row_groups.clear();
+    plan->pruning_stats = ParquetPruningStats {};
+
+    std::vector<int64_t> row_group_first_rows(metadata.num_row_groups());
+    std::vector<int> scan_range_selected_row_groups;
+    scan_range_selected_row_groups.reserve(metadata.num_row_groups());
+    int64_t next_row_group_first_row = 0;
+    for (int row_group_idx = 0; row_group_idx < metadata.num_row_groups(); ++row_group_idx) {
+        row_group_first_rows[row_group_idx] = next_row_group_first_row;
+        auto row_group_metadata = metadata.RowGroup(row_group_idx);
+        DORIS_CHECK(row_group_metadata != nullptr);
+        const int64_t row_group_rows = row_group_metadata->num_rows();
+        if (row_group_rows < 0) {
+            return Status::Corruption("Invalid negative row count in parquet row group {}",
+                                      row_group_idx);
+        }
+        next_row_group_first_row += row_group_rows;
+        if (!is_row_group_outside_range(metadata, scan_range, row_group_idx)) {
+            scan_range_selected_row_groups.push_back(row_group_idx);
+        }
+    }
+
+    std::vector<int> statistics_selected_row_groups;
+    RETURN_IF_ERROR(select_row_groups_by_statistics(
+            metadata, file_reader, file_schema, request, &scan_range_selected_row_groups,
+            &statistics_selected_row_groups, enable_bloom_filter, &plan->pruning_stats, timezone));
+
+    plan->row_groups.reserve(statistics_selected_row_groups.size());
+    for (const auto row_group_idx : statistics_selected_row_groups) {
+        auto row_group_metadata = metadata.RowGroup(row_group_idx);
+        DORIS_CHECK(row_group_metadata != nullptr);
+        const int64_t row_group_rows = row_group_metadata->num_rows();
+        if (row_group_rows == 0) {
+            continue;
+        }
+
+        RowGroupReadPlan row_group_plan;
+        row_group_plan.row_group_id = row_group_idx;
+        row_group_plan.first_file_row = row_group_first_rows[row_group_idx];
+        row_group_plan.row_group_rows = row_group_rows;
+        RETURN_IF_ERROR(select_row_group_ranges_by_page_index(
+                file_reader, file_schema, request, row_group_idx, row_group_rows,
+                &row_group_plan.selected_ranges, &row_group_plan.page_skip_plans,
+                &plan->pruning_stats, timezone));
+        if (row_group_plan.selected_ranges.empty()) {
+            continue;
+        }
+        plan->pruning_stats.selected_row_ranges += row_group_plan.selected_ranges.size();
+        plan->row_groups.push_back(std::move(row_group_plan));
+    }
+    plan->pruning_stats.selected_row_groups = plan->row_groups.size();
+    return Status::OK();
+}
+
+namespace {
+
+uint16_t apply_filter_to_selection(const IColumn::Filter& filter, SelectionVector* selection,
+                                   uint16_t selected_rows) {
+    uint16_t new_selected_rows = 0;
+    for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) {
+        const auto row_idx = selection->get_index(selection_idx);
+        if (filter[row_idx] != 0) {
+            selection->set_index(new_selected_rows++, static_cast<SelectionVector::Index>(row_idx));
+        }
+    }
+    return new_selected_rows;
+}
+
+Status execute_filter_conjuncts(const format::FileScanRequest& request, int64_t batch_rows,
+                                Block* file_block, SelectionVector* selection,
+                                uint16_t* selected_rows) {
+    for (const auto& conjunct : request.conjuncts) {
+        if (*selected_rows == 0) {
+            break;
+        }
+        DORIS_CHECK(conjunct != nullptr);
+        IColumn::Filter filter(static_cast<size_t>(batch_rows), 1);
+        bool can_filter_all = false;
+        RETURN_IF_ERROR(conjunct->execute_filter(file_block, filter.data(),
+                                                 static_cast<size_t>(batch_rows), false,
+                                                 &can_filter_all));
+        *selected_rows =
+                can_filter_all ? 0 : apply_filter_to_selection(filter, selection, *selected_rows);
+    }
+    return Status::OK();
+}
+
+Status execute_delete_conjuncts(const format::FileScanRequest& request, int64_t batch_rows,
+                                Block* file_block, SelectionVector* selection,
+                                uint16_t* selected_rows) {
+    for (const auto& delete_conjunct : request.delete_conjuncts) {
+        if (*selected_rows == 0) {
+            break;
+        }
+        DORIS_CHECK(delete_conjunct != nullptr);
+        int result_column_id = -1;
+        RETURN_IF_ERROR(delete_conjunct->root()->execute(delete_conjunct.get(), file_block,
+                                                         &result_column_id));
+        DORIS_CHECK(result_column_id >= 0 &&
+                    result_column_id < static_cast<int>(file_block->columns()));
+        const auto& delete_filter = assert_cast<const ColumnUInt8&>(
+                                            *file_block->get_by_position(result_column_id).column)
+                                            .get_data();
+        DORIS_CHECK(delete_filter.size() == static_cast<size_t>(batch_rows));
+        IColumn::Filter keep_filter(static_cast<size_t>(batch_rows), 1);
+        bool has_kept_row = false;
+        for (size_t row = 0; row < static_cast<size_t>(batch_rows); ++row) {
+            keep_filter[row] = !delete_filter[row];
+            has_kept_row |= keep_filter[row] != 0;
+        }
+        file_block->erase(result_column_id);
+        *selected_rows =
+                !has_kept_row ? 0
+                              : apply_filter_to_selection(keep_filter, selection, *selected_rows);
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+IColumn::Filter selection_to_filter(const SelectionVector& selection, uint16_t selected_rows,
+                                    int64_t batch_rows) {
+    IColumn::Filter filter(static_cast<size_t>(batch_rows), 0);
+    for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) {
+        filter[selection.get_index(selection_idx)] = 1;
+    }
+    return filter;
+}
+
+Status execute_batch_filters(const format::FileScanRequest& request, int64_t batch_rows,
+                             Block* file_block, SelectionVector* selection, uint16_t* selected_rows,
+                             int64_t* conjunct_filtered_rows) {
+    if (request.conjuncts.empty() && request.delete_conjuncts.empty()) {
+        return Status::OK();
+    }
+    const auto selected_rows_before_conjunct = *selected_rows;
+    RETURN_IF_ERROR(
+            execute_filter_conjuncts(request, batch_rows, file_block, selection, selected_rows));
+    if (conjunct_filtered_rows != nullptr) {
+        *conjunct_filtered_rows += static_cast<int64_t>(selected_rows_before_conjunct) -
+                                   static_cast<int64_t>(*selected_rows);
+    }
+    if (*selected_rows == 0) {
+        return Status::OK();
+    }
+    return execute_delete_conjuncts(request, batch_rows, file_block, selection, selected_rows);
+}
+
+namespace {
+int64_t count_range_rows(const std::vector<RowRange>& ranges) {
+    int64_t rows = 0;
+    for (const auto& range : ranges) {
+        rows += range.length;
+    }
+    return rows;
+}
+
+void append_intersection(const RowRange& left, const RowRange& right,
+                         std::vector<RowRange>* result) {
+    const int64_t start = std::max(left.start, right.start);
+    const int64_t end = std::min(left.start + left.length, right.start + right.length);
+    if (start < end) {
+        result->push_back(RowRange {.start = start, .length = end - start});
+    }
+}
+
+std::vector<RowRange> filter_ranges_by_condition_cache(const std::vector<RowRange>& ranges,
+                                                       const std::vector<bool>& cache,
+                                                       int64_t row_group_first_row,
+                                                       int64_t base_granule) {
+    std::vector<RowRange> result;
+    if (cache.empty()) {
+        return ranges;
+    }
+
+    // Cache coordinates are file-global granules; RowRange coordinates are row-group-relative.
+    // Walk every selected range in order and split it by granule. Granules covered by the bitmap
+    // are kept only when the bit is true. Granules outside the bitmap are kept conservatively, so
+    // an undersized or old-format cache entry cannot skip valid rows.
+    for (const auto& range : ranges) {
+        const int64_t global_start = row_group_first_row + range.start;
+        const int64_t global_end = global_start + range.length;
+        for (int64_t granule = global_start / ConditionCacheContext::GRANULE_SIZE;
+             granule <= (global_end - 1) / ConditionCacheContext::GRANULE_SIZE; ++granule) {
+            const int64_t cache_idx = granule - base_granule;
+            const bool keep = cache_idx < 0 || static_cast<size_t>(cache_idx) >= cache.size() ||
+                              cache[static_cast<size_t>(cache_idx)];
+            if (!keep) {
+                continue;
+            }
+            const int64_t granule_start = granule * ConditionCacheContext::GRANULE_SIZE;
+            const int64_t granule_end = granule_start + ConditionCacheContext::GRANULE_SIZE;
+            const RowRange file_granule_range {.start = granule_start - row_group_first_row,
+                                               .length = granule_end - granule_start};
+            append_intersection(range, file_granule_range, &result);
+        }
+    }
+    return result;
+}
+
+} // namespace
+
+void ParquetScanScheduler::set_plan(RowGroupScanPlan plan) {
+    _row_group_plans = std::move(plan.row_groups);
+    _condition_cache_filtered_rows = 0;
+    _predicate_filtered_rows = 0;
+    reset();
+}
+
+void ParquetScanScheduler::set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) {
+    _condition_cache_ctx = std::move(ctx);
+    if (!_condition_cache_ctx || !_condition_cache_ctx->filter_result || _row_group_plans.empty()) {
+        return;
+    }
+
+    _condition_cache_ctx->base_granule =
+            _row_group_plans.front().first_file_row / ConditionCacheContext::GRANULE_SIZE;
+    if (!_condition_cache_ctx->is_hit) {
+        return;
+    }
+
+    std::vector<RowGroupReadPlan> filtered_plans;
+    filtered_plans.reserve(_row_group_plans.size());
+    for (auto& plan : _row_group_plans) {
+        const int64_t old_rows = count_range_rows(plan.selected_ranges);
+        plan.selected_ranges = filter_ranges_by_condition_cache(
+                plan.selected_ranges, *_condition_cache_ctx->filter_result, plan.first_file_row,
+                _condition_cache_ctx->base_granule);
+        const int64_t new_rows = count_range_rows(plan.selected_ranges);
+        _condition_cache_filtered_rows += old_rows - new_rows;
+        if (!plan.selected_ranges.empty()) {
+            filtered_plans.push_back(std::move(plan));
+        }
+    }
+    _row_group_plans = std::move(filtered_plans);
+    reset();
+}
+
+void ParquetScanScheduler::reset() {
+    _next_row_group_plan_idx = 0;
+    reset_current_row_group();
+}
+
+void ParquetScanScheduler::reset_current_row_group() {
+    _current_row_group.reset();
+    _current_predicate_columns.clear();
+    _current_non_predicate_columns.clear();
+    _current_row_group_rows = 0;
+    _current_row_group_rows_read = 0;
+    _current_row_group_first_row = 0;
+    _current_selected_ranges.clear();
+    _current_range_idx = 0;
+    _current_range_rows_read = 0;
+}
+
+Status ParquetScanScheduler::open_next_row_group(
+        ParquetFileContext& file_context,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, bool* has_row_group) {
+    *has_row_group = false;
+    if (_next_row_group_plan_idx >= _row_group_plans.size()) {
+        return Status::OK();
+    }
+    const RowGroupReadPlan& row_group_plan = _row_group_plans[_next_row_group_plan_idx++];
+    const int row_group_idx = row_group_plan.row_group_id;
+    try {
+        _current_row_group = file_context.file_reader->RowGroup(row_group_idx);
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to open parquet row group {}: {}", row_group_idx,
+                                  e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to open parquet row group {}: {}", row_group_idx,
+                                     e.what());
+    }
+
+    auto row_group_metadata = file_context.metadata->RowGroup(row_group_idx);
+    DORIS_CHECK(row_group_metadata != nullptr);
+    _current_row_group_rows = row_group_metadata->num_rows();
+    DORIS_CHECK(_current_row_group_rows == row_group_plan.row_group_rows);
+    DORIS_CHECK(_current_row_group_rows > 0);
+    DORIS_CHECK(!row_group_plan.selected_ranges.empty());
+    _current_row_group_first_row = row_group_plan.first_file_row;
+    _current_row_group_rows_read = 0;
+    _current_selected_ranges = row_group_plan.selected_ranges;
+    _current_range_idx = 0;
+    _current_range_rows_read = 0;
+    _current_predicate_columns.clear();
+    _current_non_predicate_columns.clear();
+
+    ParquetColumnReaderFactory column_reader_factory(
+            _current_row_group, file_context.schema->num_columns(), &row_group_plan.page_skip_plans,
+            _page_skip_profile, _timezone, _enable_strict_mode,
+            _scan_profile.column_reader_profile);
+    for (const auto& col : request.predicate_columns) {
+        const auto local_id = col.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID) {
+            _current_predicate_columns[local_id] =
+                    column_reader_factory.create_row_position_column_reader(
+                            _current_row_group_first_row);
+            continue;
+        }
+        if (local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            DORIS_CHECK(_global_rowid_context.has_value());
+            _current_predicate_columns[local_id] =
+                    column_reader_factory.create_global_rowid_column_reader(
+                            *_global_rowid_context, _current_row_group_first_row);
+            continue;
+        }
+
+        DORIS_CHECK(local_id >= 0 && local_id < static_cast<int32_t>(file_schema.size()));
+        const auto& column_schema = file_schema[local_id];
+        DORIS_CHECK(column_schema != nullptr);
+        std::unique_ptr<ParquetColumnReader> column_reader;
+        RETURN_IF_ERROR(column_reader_factory.create(*column_schema, &col, &column_reader));
+        _current_predicate_columns[local_id] = std::move(column_reader);
+    }
+    for (const auto& col : request.non_predicate_columns) {
+        const auto local_id = col.local_id();
+        if (local_id == format::ROW_POSITION_COLUMN_ID) {
+            _current_non_predicate_columns[local_id] =
+                    column_reader_factory.create_row_position_column_reader(
+                            _current_row_group_first_row);
+            continue;
+        }
+        if (local_id == format::GLOBAL_ROWID_COLUMN_ID) {
+            DORIS_CHECK(_global_rowid_context.has_value());
+            _current_non_predicate_columns[local_id] =
+                    column_reader_factory.create_global_rowid_column_reader(
+                            *_global_rowid_context, _current_row_group_first_row);
+            continue;
+        }
+        DORIS_CHECK(local_id >= 0 && local_id < static_cast<int32_t>(file_schema.size()));
+        const auto& column_schema = file_schema[local_id];
+        DORIS_CHECK(column_schema != nullptr);
+        std::unique_ptr<ParquetColumnReader> column_reader;
+        RETURN_IF_ERROR(column_reader_factory.create(*column_schema, &col, &column_reader));
+        _current_non_predicate_columns[local_id] = std::move(column_reader);
+    }
+    *has_row_group = true;
+    return Status::OK();
+}
+
+Status ParquetScanScheduler::skip_current_row_group_rows(int64_t rows) {
+    DORIS_CHECK(rows >= 0);
+    if (rows == 0) {
+        return Status::OK();
+    }
+    if (_scan_profile.range_gap_skipped_rows != nullptr) {
+        COUNTER_UPDATE(_scan_profile.range_gap_skipped_rows, rows);
+    }
+    for (const auto& column_reader : _current_predicate_columns | std::views::values) {
+        RETURN_IF_ERROR(column_reader->skip(rows));
+    }
+    for (const auto& column_reader : _current_non_predicate_columns | std::views::values) {
+        RETURN_IF_ERROR(column_reader->skip(rows));
+    }
+    _current_row_group_rows_read += rows;
+    return Status::OK();
+}
+
+Status ParquetScanScheduler::read_filter_columns(int64_t batch_rows,
+                                                 const format::FileScanRequest& request,
+                                                 Block* file_block, SelectionVector* selection,
+                                                 uint16_t* selected_rows,
+                                                 int64_t* conjunct_filtered_rows) {
+    if (!request.conjuncts.empty() || !request.delete_conjuncts.empty()) {
+        selection->resize(static_cast<size_t>(batch_rows));
+    }
+    for (const auto& [fid, column_reader] : _current_predicate_columns) {
+        auto position_it = request.local_positions.find(format::LocalColumnId(fid));
+        DORIS_CHECK(position_it != request.local_positions.end());
+        const auto block_position = position_it->second.value();
+        DCHECK(remove_nullable(column_reader->type())
+                       ->equals(*remove_nullable(file_block->get_by_position(block_position).type)))
+                << column_reader->type()->get_name() << " "
+                << file_block->get_by_position(block_position).type->get_name() << " "
+                << column_reader->name() << " " << file_block->get_by_position(block_position).name;
+        auto column = file_block->get_by_position(block_position).column->assert_mutable();
+        int64_t column_rows = 0;
+        {
+            SCOPED_TIMER(_scan_profile.column_read_time);
+            RETURN_IF_ERROR(column_reader->read(batch_rows, column, &column_rows));
+        }
+        if (column_rows != batch_rows) {
+            return Status::Corruption("Parquet filter column {} returned {} rows, expected {} rows",
+                                      column_reader->name(), column_rows, batch_rows);
+        }
+        file_block->replace_by_position(block_position, std::move(column));
+    }
+    if (_scan_profile.predicate_filter_time == nullptr) {
+        return execute_batch_filters(request, batch_rows, file_block, selection, selected_rows,
+                                     conjunct_filtered_rows);
+    }
+    SCOPED_TIMER(_scan_profile.predicate_filter_time);
+    return execute_batch_filters(request, batch_rows, file_block, selection, selected_rows,
+                                 conjunct_filtered_rows);
+}
+
+Status ParquetScanScheduler::read_current_row_group_batch(int64_t batch_rows,
+                                                          const format::FileScanRequest& request,
+                                                          int64_t batch_first_file_row,
+                                                          Block* file_block, size_t* rows) {
+    if (_scan_profile.total_batches != nullptr) {
+        COUNTER_UPDATE(_scan_profile.total_batches, 1);
+    }
+    if (_scan_profile.raw_rows_read != nullptr) {
+        COUNTER_UPDATE(_scan_profile.raw_rows_read, batch_rows);
+    }
+    if (_current_predicate_columns.empty() && _current_non_predicate_columns.empty()) {
+        *rows = static_cast<size_t>(batch_rows);
+        if (_scan_profile.selected_rows != nullptr) {
+            COUNTER_UPDATE(_scan_profile.selected_rows, batch_rows);
+        }
+        return Status::OK();
+    }
+    SelectionVector selection;
+    DORIS_CHECK(batch_rows <= std::numeric_limits<uint16_t>::max());
+    uint16_t selected_rows = static_cast<uint16_t>(batch_rows);
+    int64_t conjunct_filtered_rows = 0;
+    RETURN_IF_ERROR(read_filter_columns(batch_rows, request, file_block, &selection, &selected_rows,
+                                        &conjunct_filtered_rows));
+    _predicate_filtered_rows += conjunct_filtered_rows;
+    mark_condition_cache_granules(selection, selected_rows, batch_first_file_row);
+
+    const bool need_filter_output = selected_rows != batch_rows;
+    if (_scan_profile.selected_rows != nullptr) {
+        COUNTER_UPDATE(_scan_profile.selected_rows, selected_rows);
+    }
+    if (_scan_profile.rows_filtered_by_conjunct != nullptr) {
+        COUNTER_UPDATE(_scan_profile.rows_filtered_by_conjunct, conjunct_filtered_rows);
+    }
+    if (!_current_non_predicate_columns.empty() &&
+        _scan_profile.lazy_read_filtered_rows != nullptr) {
+        COUNTER_UPDATE(_scan_profile.lazy_read_filtered_rows, batch_rows - selected_rows);
+    }
+    if (selected_rows == 0 && _scan_profile.empty_selection_batches != nullptr) {
+        COUNTER_UPDATE(_scan_profile.empty_selection_batches, 1);
+    }
+    if (need_filter_output) {
+        IColumn::Filter output_filter = selection_to_filter(selection, selected_rows, batch_rows);
+        for (const auto& col : request.predicate_columns) {
+            auto position_it = request.local_positions.find(col.column_id());
+            DORIS_CHECK(position_it != request.local_positions.end());
+            const auto block_position = position_it->second.value();
+            RETURN_IF_CATCH_EXCEPTION(file_block->replace_by_position(
+                    block_position, file_block->get_by_position(block_position)
+                                            .column->filter(output_filter, selected_rows)));
+        }
+    }
+
+    {
+        SCOPED_TIMER(_scan_profile.column_read_time);
+        for (const auto& [fid, column_reader] : _current_non_predicate_columns) {
+            auto position_it = request.local_positions.find(format::LocalColumnId(fid));
+            DORIS_CHECK(position_it != request.local_positions.end());
+            const auto block_position = position_it->second.value();
+            auto column = file_block->get_by_position(block_position).column->assert_mutable();
+            DCHECK_EQ(file_block->get_by_position(block_position).type->get_primitive_type(),
+                      column_reader->type()->get_primitive_type())
+                    << type_to_string(file_block->get_by_position(block_position)
+                                              .type->get_primitive_type())
+                    << " " << type_to_string(column_reader->type()->get_primitive_type()) << " "
+                    << column_reader->name() << " " << fid << " " << block_position;
+            if (need_filter_output) {
+                [[maybe_unused]] auto old_size = column->size();
+                RETURN_IF_ERROR(
+                        column_reader->select(selection, selected_rows, batch_rows, column));
+                if (column->size() != old_size + selected_rows) {
+                    return Status::Corruption(
+                            "Parquet selected output column {} returned {} rows, expected {} rows",
+                            column_reader->name(), column->size(), old_size + selected_rows);
+                }
+            } else {
+                int64_t column_rows = 0;
+                RETURN_IF_ERROR(column_reader->read(batch_rows, column, &column_rows));
+                if (column_rows != batch_rows) {
+                    return Status::Corruption(
+                            "Parquet output column {} returned {} rows, expected {} rows",
+                            column_reader->name(), column_rows, batch_rows);
+                }
+            }
+            file_block->replace_by_position(block_position, std::move(column));
+        }
+    }
+    *rows = static_cast<size_t>(selected_rows);
+    return Status::OK();
+}
+
+void ParquetScanScheduler::mark_condition_cache_granules(const SelectionVector& selection,
+                                                         uint16_t selected_rows,
+                                                         int64_t batch_first_file_row) {
+    if (!_condition_cache_ctx || _condition_cache_ctx->is_hit ||
+        !_condition_cache_ctx->filter_result) {
+        return;
+    }
+    auto& cache = *_condition_cache_ctx->filter_result;
+    for (uint16_t selection_idx = 0; selection_idx < selected_rows; ++selection_idx) {
+        const int64_t file_row = batch_first_file_row + selection.get_index(selection_idx);
+        const int64_t granule = file_row / ConditionCacheContext::GRANULE_SIZE;
+        const int64_t cache_idx = granule - _condition_cache_ctx->base_granule;
+        if (cache_idx >= 0 && static_cast<size_t>(cache_idx) < cache.size()) {
+            cache[static_cast<size_t>(cache_idx)] = true;
+        }
+    }
+}
+
+Status ParquetScanScheduler::read_next_batch(
+        ParquetFileContext& file_context,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, Block* file_block, size_t* rows, bool* eof) {
+    *rows = 0;
+    while (true) {
+        if (_current_row_group == nullptr) {
+            bool has_row_group = false;
+            RETURN_IF_ERROR(
+                    open_next_row_group(file_context, file_schema, request, &has_row_group));
+            if (!has_row_group) {
+                *eof = true;
+                return Status::OK();
+            }
+        }
+
+        if (_current_range_idx >= _current_selected_ranges.size()) {
+            // Current row group finished, try next row group.
+            reset_current_row_group();
+            continue;
+        }
+
+        const RowRange& current_range = _current_selected_ranges[_current_range_idx];
+        DORIS_CHECK(current_range.start >= 0);
+        DORIS_CHECK(current_range.length > 0);
+        DORIS_CHECK(current_range.start + current_range.length <= _current_row_group_rows);
+
+        if (_current_row_group_rows_read < current_range.start) {
+            // Skip filtered rows according to row group level pruning.
+            RETURN_IF_ERROR(skip_current_row_group_rows(current_range.start -
+                                                        _current_row_group_rows_read));
+        }
+        DORIS_CHECK(_current_row_group_rows_read == current_range.start + _current_range_rows_read);
+        const int64_t remaining_rows = current_range.length - _current_range_rows_read;
+        if (remaining_rows <= 0) {
+            // Current range finished, try next range in the same row group.
+            ++_current_range_idx;
+            _current_range_rows_read = 0;
+            continue;
+        }
+
+        const int64_t batch_rows = std::min<int64_t>(_batch_size, remaining_rows);
+        const int64_t physical_rows_read = batch_rows;
+        const int64_t batch_first_file_row =
+                _current_row_group_first_row + _current_row_group_rows_read;
+        RETURN_IF_ERROR(read_current_row_group_batch(batch_rows, request, batch_first_file_row,
+                                                     file_block, rows));
+        _current_row_group_rows_read += physical_rows_read;
+        _current_range_rows_read += physical_rows_read;
+        if (_current_range_rows_read >= current_range.length) {
+            ++_current_range_idx;
+            _current_range_rows_read = 0;
+        }
+        if (*rows == 0) {
+            continue;
+        }
+        *eof = false;
+        return Status::OK();
+    }
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_scan.h b/be/src/format_v2/parquet/parquet_scan.h
new file mode 100644
index 00000000000000..ca3c665a2e95f8
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_scan.h
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <optional>
+#include <vector>
+
+#include "common/status.h"
+#include "core/column/column.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_profile.h"
+#include "format_v2/parquet/parquet_statistics.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/parquet/selection_vector.h"
+#include "runtime/runtime_profile.h"
+#include "storage/segment/condition_cache.h"
+
+namespace parquet {
+class FileMetaData;
+class ParquetFileReader;
+class RowGroupReader;
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris {
+class Block;
+
+namespace format {
+struct FileScanRequest;
+} // namespace format
+} // namespace doris
+
+namespace doris::format::parquet {
+
+struct ParquetFileContext;
+struct ParquetColumnSchema;
+
+// ============================================================================
+// ============================================================================
+
+struct ParquetScanRange {
+    int64_t start_offset = 0;
+    int64_t size = -1;      // -1 means read the whole file
+    int64_t file_size = -1; // -1 means unknown
+};
+
+struct RowGroupReadPlan {
+    int row_group_id = -1;                 // row group id
+    int64_t first_file_row = 0;            // first file row for this row group (0-based)
+    int64_t row_group_rows = 0;            // row count of this row group
+    std::vector<RowRange> selected_ranges; // row ranges to read after page-index pruning
+    std::map<int, ParquetPageSkipPlan>
+            page_skip_plans; // leaf_column_id -> data pages that can be skipped completely
+};
+
+struct RowGroupScanPlan {
+    std::vector<RowGroupReadPlan> row_groups; // row groups selected after pruning
+    ParquetPruningStats pruning_stats;        // pruning statistics
+};
+
+// ============================================================================
+// ============================================================================
+
+Status plan_parquet_row_groups(const ::parquet::FileMetaData& metadata,
+                               ::parquet::ParquetFileReader* file_reader,
+                               const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                               const format::FileScanRequest& request,
+                               const ParquetScanRange& scan_range, bool enable_bloom_filter,
+                               RowGroupScanPlan* plan, const cctz::time_zone* timezone = nullptr);
+
+IColumn::Filter selection_to_filter(const SelectionVector& selection, uint16_t selected_rows,
+                                    int64_t batch_rows);
+
+Status execute_batch_filters(const format::FileScanRequest& request, int64_t batch_rows,
+                             Block* file_block, SelectionVector* selection, uint16_t* selected_rows,
+                             int64_t* conjunct_filtered_rows = nullptr);
+
+// ============================================================================
+// ============================================================================
+//   while true:
+//     3. read_current_row_group_batch(batch_rows)
+// ============================================================================
+class ParquetScanScheduler {
+public:
+    static constexpr int64_t DEFAULT_READ_BATCH_SIZE = 4096;
+
+    void set_plan(RowGroupScanPlan plan);
+    void set_page_skip_profile(ParquetPageSkipProfile page_skip_profile) {
+        _page_skip_profile = page_skip_profile;
+    }
+    void set_scan_profile(ParquetScanProfile scan_profile) { _scan_profile = scan_profile; }
+    void set_global_rowid_context(std::optional<format::GlobalRowIdContext> context) {
+        _global_rowid_context = context;
+    }
+    void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx);
+    void set_timezone(const cctz::time_zone* timezone) { _timezone = timezone; }
+    void set_enable_strict_mode(bool enable_strict_mode) {
+        _enable_strict_mode = enable_strict_mode;
+    }
+    // Upper scanner owns adaptive memory feedback; scheduler only applies the current row cap when
+    // splitting selected row ranges into physical read batches.
+    void set_batch_size(size_t batch_size) {
+        _batch_size = batch_size == 0 ? 1 : static_cast<int64_t>(batch_size);
+    }
+    void reset();
+    bool empty() const { return _row_group_plans.empty(); }
+    int64_t condition_cache_filtered_rows() const { return _condition_cache_filtered_rows; }
+    int64_t predicate_filtered_rows() const { return _predicate_filtered_rows; }
+
+    Status read_next_batch(ParquetFileContext& file_context,
+                           const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                           const format::FileScanRequest& request, Block* file_block, size_t* rows,
+                           bool* eof);
+
+private:
+    void reset_current_row_group();
+
+    Status open_next_row_group(ParquetFileContext& file_context,
+                               const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                               const format::FileScanRequest& request, bool* has_row_group);
+
+    Status skip_current_row_group_rows(int64_t rows);
+
+    Status read_filter_columns(int64_t batch_rows, const format::FileScanRequest& request,
+                               Block* file_block, SelectionVector* selection,
+                               uint16_t* selected_rows, int64_t* conjunct_filtered_rows);
+
+    Status read_current_row_group_batch(int64_t batch_rows, const format::FileScanRequest& request,
+                                        int64_t batch_first_file_row, Block* file_block,
+                                        size_t* rows);
+
+    void mark_condition_cache_granules(const SelectionVector& selection, uint16_t selected_rows,
+                                       int64_t batch_first_file_row);
+
+    std::vector<RowGroupReadPlan> _row_group_plans; // row group queue to scan
+    size_t _next_row_group_plan_idx = 0;            // index of the next row group to process
+
+    std::shared_ptr<::parquet::RowGroupReader> _current_row_group; // Arrow RowGroup reader
+    std::map<ColumnId, std::unique_ptr<ParquetColumnReader>>
+            _current_predicate_columns; // predicate ColumnReaders
+    std::map<ColumnId, std::unique_ptr<ParquetColumnReader>>
+            _current_non_predicate_columns;   // non-predicate ColumnReaders
+    int64_t _current_row_group_rows = 0;      // current row group row count
+    int64_t _current_row_group_rows_read = 0; // rows read in the current row group (cursor)
+    int64_t _current_row_group_first_row = 0; // first file row of the current row group
+    std::vector<RowRange>
+            _current_selected_ranges; // selected ranges for the current row group after page-index pruning
+    size_t _current_range_idx = 0;        // current selected_range index
+    int64_t _current_range_rows_read = 0; // rows read in the current range
+
+    ParquetPageSkipProfile _page_skip_profile;
+    ParquetScanProfile _scan_profile;
+    std::optional<format::GlobalRowIdContext> _global_rowid_context;
+    const cctz::time_zone* _timezone = nullptr;
+    bool _enable_strict_mode = false;
+    int64_t _batch_size = DEFAULT_READ_BATCH_SIZE;
+    std::shared_ptr<ConditionCacheContext> _condition_cache_ctx;
+    int64_t _condition_cache_filtered_rows = 0;
+    int64_t _predicate_filtered_rows = 0;
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_statistics.cpp b/be/src/format_v2/parquet/parquet_statistics.cpp
new file mode 100644
index 00000000000000..1207aecd8e4877
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_statistics.cpp
@@ -0,0 +1,1303 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_statistics.h"
+
+#include <parquet/api/reader.h>
+#include <parquet/bloom_filter.h>
+#include <parquet/bloom_filter_reader.h>
+#include <parquet/column_page.h>
+#include <parquet/encoding.h>
+#include <parquet/page_index.h>
+#include <parquet/statistics.h>
+#include <parquet/types.h>
+
+#include <algorithm>
+#include <cstddef>
+#include <cstring>
+#include <exception>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/config.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "core/field.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "runtime/runtime_profile.h"
+#include "storage/index/zone_map/zone_map_index.h"
+#include "storage/predicate/accept_null_predicate.h"
+#include "storage/predicate/column_predicate.h"
+
+namespace doris::format::parquet {
+
+namespace {
+
+enum class ParquetRowGroupPruneReason {
+    NONE,         // cannot prune; must read
+    STATISTICS,   // excluded by min/max statistics
+    DICTIONARY,   // excluded by dictionary
+    BLOOM_FILTER, // excluded by bloom filter
+};
+
+PrimitiveType physical_filter_type(const ParquetColumnSchema& column_schema) {
+    if (column_schema.type == nullptr) {
+        return INVALID_TYPE;
+    }
+    switch (remove_nullable(column_schema.type)->get_primitive_type()) {
+    case TYPE_BOOLEAN:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_STRING:
+        return remove_nullable(column_schema.type)->get_primitive_type();
+    default:
+        return INVALID_TYPE;
+    }
+}
+
+DecodedTimeUnit decoded_time_unit(ParquetTimeUnit time_unit) {
+    switch (time_unit) {
+    case ParquetTimeUnit::MILLIS:
+        return DecodedTimeUnit::MILLIS;
+    case ParquetTimeUnit::MICROS:
+        return DecodedTimeUnit::MICROS;
+    case ParquetTimeUnit::NANOS:
+        return DecodedTimeUnit::NANOS;
+    default:
+        return DecodedTimeUnit::UNKNOWN;
+    }
+}
+
+Status read_decoded_field(const ParquetColumnSchema& column_schema, DecodedColumnView view,
+                          Field* field, const cctz::time_zone* timezone) {
+    DORIS_CHECK(column_schema.type != nullptr);
+    DORIS_CHECK(field != nullptr);
+    constexpr uint8_t not_null = 0;
+    view.row_count = 1;
+    view.null_map = &not_null;
+    view.time_unit = decoded_time_unit(column_schema.type_descriptor.time_unit);
+    view.logical_integer_bit_width = column_schema.type_descriptor.integer_bit_width;
+    view.logical_integer_is_signed = !column_schema.type_descriptor.is_unsigned_integer;
+    view.decimal_precision = column_schema.type_descriptor.decimal_precision;
+    view.decimal_scale = column_schema.type_descriptor.decimal_scale;
+    view.fixed_length = column_schema.type_descriptor.fixed_length;
+    view.timestamp_is_adjusted_to_utc = column_schema.type_descriptor.timestamp_is_adjusted_to_utc;
+    view.timezone = timezone;
+    return column_schema.type->get_serde()->read_field_from_decoded_value(*column_schema.type,
+                                                                          field, view);
+}
+
+template <typename NativeType>
+bool set_decoded_field(const ParquetColumnSchema& column_schema, DecodedValueKind value_kind,
+                       const NativeType& value, Field* field, const cctz::time_zone* timezone) {
+    DecodedColumnView view;
+    view.value_kind = value_kind;
+    view.values = reinterpret_cast<const uint8_t*>(&value);
+    return read_decoded_field(column_schema, view, field, timezone).ok();
+}
+
+template <typename ParquetDType>
+bool set_decoded_min_max(const std::shared_ptr<::parquet::Statistics>& statistics,
+                         const ParquetColumnSchema& column_schema, DecodedValueKind value_kind,
+                         ParquetColumnStatistics* column_statistics,
+                         const cctz::time_zone* timezone) {
+    auto typed_statistics =
+            std::static_pointer_cast<::parquet::TypedStatistics<ParquetDType>>(statistics);
+    if (!set_decoded_field(column_schema, value_kind, typed_statistics->min(),
+                           &column_statistics->min_value, timezone) ||
+        !set_decoded_field(column_schema, value_kind, typed_statistics->max(),
+                           &column_statistics->max_value, timezone)) {
+        return false;
+    }
+    return true;
+}
+
+bool set_decoded_binary_field(const ParquetColumnSchema& column_schema, DecodedValueKind value_kind,
+                              const StringRef& value, Field* field,
+                              const cctz::time_zone* timezone) {
+    std::vector<StringRef> binary_values {value};
+    DecodedColumnView view;
+    view.value_kind = value_kind;
+    view.binary_values = &binary_values;
+    return read_decoded_field(column_schema, view, field, timezone).ok();
+}
+
+bool set_string_min_max(const std::shared_ptr<::parquet::Statistics>& statistics,
+                        const ParquetColumnSchema& column_schema,
+                        ParquetColumnStatistics* column_statistics,
+                        const cctz::time_zone* timezone) {
+    switch (statistics->physical_type()) {
+    case ::parquet::Type::BYTE_ARRAY: {
+        auto typed_statistics =
+                std::static_pointer_cast<::parquet::TypedStatistics<::parquet::ByteArrayType>>(
+                        statistics);
+        const auto min = ::parquet::ByteArrayToString(typed_statistics->min());
+        const auto max = ::parquet::ByteArrayToString(typed_statistics->max());
+        if (!set_decoded_binary_field(column_schema, DecodedValueKind::BINARY,
+                                      StringRef(min.data(), min.size()),
+                                      &column_statistics->min_value, timezone) ||
+            !set_decoded_binary_field(column_schema, DecodedValueKind::BINARY,
+                                      StringRef(max.data(), max.size()),
+                                      &column_statistics->max_value, timezone)) {
+            return false;
+        }
+        return true;
+    }
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: {
+        if (column_schema.descriptor == nullptr || column_schema.descriptor->type_length() <= 0) {
+            return false;
+        }
+        auto typed_statistics =
+                std::static_pointer_cast<::parquet::TypedStatistics<::parquet::FLBAType>>(
+                        statistics);
+        const int type_length = column_schema.descriptor->type_length();
+        const std::string min(reinterpret_cast<const char*>(typed_statistics->min().ptr),
+                              type_length);
+        const std::string max(reinterpret_cast<const char*>(typed_statistics->max().ptr),
+                              type_length);
+        if (!set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY,
+                                      StringRef(min.data(), min.size()),
+                                      &column_statistics->min_value, timezone) ||
+            !set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY,
+                                      StringRef(max.data(), max.size()),
+                                      &column_statistics->max_value, timezone)) {
+            return false;
+        }
+        return true;
+    }
+    default:
+        return false;
+    }
+}
+
+bool is_null_only_predicate(const ColumnPredicate& predicate) {
+    return predicate.type() == PredicateType::IS_NULL ||
+           predicate.type() == PredicateType::IS_NOT_NULL;
+}
+
+bool is_supported_dictionary_predicate(const ColumnPredicate& predicate) {
+    switch (predicate.type()) {
+    case PredicateType::EQ:
+    case PredicateType::IN_LIST:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool is_bloom_filter_prunable_predicate(const ColumnPredicate& predicate) {
+    if (dynamic_cast<const AcceptNullPredicate*>(&predicate) != nullptr ||
+        is_null_only_predicate(predicate)) {
+        return false;
+    }
+    return predicate.can_do_bloom_filter(false);
+}
+
+template <typename T>
+T load_predicate_value(const char* data) {
+    T value;
+    memcpy(&value, data, sizeof(T));
+    return value;
+}
+
+class ArrowParquetBloomFilterAdapter final : public segment_v2::BloomFilter {
+public:
+    ArrowParquetBloomFilterAdapter(const ParquetColumnSchema& column_schema,
+                                   const ::parquet::BloomFilter& bloom_filter)
+            : _column_schema(column_schema), _bloom_filter(bloom_filter) {}
+
+    void add_bytes(const char* buf, size_t size) override { DORIS_CHECK(false); }
+
+    bool test_bytes(const char* buf, size_t size) const override {
+        if (buf == nullptr) {
+            return true;
+        }
+        switch (physical_filter_type(_column_schema)) {
+        case TYPE_BOOLEAN:
+            return test_boolean(buf, size);
+        case TYPE_INT:
+            return test_int32(buf, size);
+        case TYPE_BIGINT:
+            return test_int64(buf, size);
+        case TYPE_FLOAT:
+            return test_float(buf, size);
+        case TYPE_DOUBLE:
+            return test_double(buf, size);
+        case TYPE_STRING:
+            return test_string(buf, size);
+        default:
+            return true;
+        }
+    }
+
+    void set_has_null(bool has_null) override { DORIS_CHECK(!has_null); }
+    bool has_null() const override { return false; }
+    void add_hash(uint64_t hash) override { DORIS_CHECK(false); }
+    bool test_hash(uint64_t hash) const override { return _bloom_filter.FindHash(hash); }
+
+private:
+    bool test_boolean(const char* buf, size_t size) const {
+        if (size == sizeof(bool)) {
+            const int32_t value = load_predicate_value<bool>(buf) ? 1 : 0;
+            return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+        }
+        if (size == sizeof(int32_t)) {
+            const int32_t value = load_predicate_value<int32_t>(buf);
+            return _bloom_filter.FindHash(_bloom_filter.Hash(value != 0 ? 1 : 0));
+        }
+        return true;
+    }
+
+    bool test_int32(const char* buf, size_t size) const {
+        if (size == sizeof(int8_t)) {
+            return find_int32(static_cast<int32_t>(load_predicate_value<int8_t>(buf)));
+        }
+        if (size == sizeof(int16_t)) {
+            return find_int32(static_cast<int32_t>(load_predicate_value<int16_t>(buf)));
+        }
+        if (size == sizeof(int32_t)) {
+            return find_int32(load_predicate_value<int32_t>(buf));
+        }
+        return true;
+    }
+
+    bool test_int64(const char* buf, size_t size) const {
+        if (size != sizeof(int64_t)) {
+            return true;
+        }
+        const int64_t value = load_predicate_value<int64_t>(buf);
+        return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+    }
+
+    bool test_float(const char* buf, size_t size) const {
+        if (size != sizeof(float)) {
+            return true;
+        }
+        const float value = load_predicate_value<float>(buf);
+        return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+    }
+
+    bool test_double(const char* buf, size_t size) const {
+        if (size != sizeof(double)) {
+            return true;
+        }
+        const double value = load_predicate_value<double>(buf);
+        return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+    }
+
+    bool test_string(const char* buf, size_t size) const {
+        ::parquet::ByteArray value(static_cast<uint32_t>(size),
+                                   reinterpret_cast<const uint8_t*>(buf));
+        return _bloom_filter.FindHash(_bloom_filter.Hash(&value));
+    }
+
+    bool find_int32(int32_t value) const {
+        return _bloom_filter.FindHash(_bloom_filter.Hash(value));
+    }
+
+    const ParquetColumnSchema& _column_schema;
+    const ::parquet::BloomFilter& _bloom_filter;
+};
+
+const ParquetColumnSchema* resolve_predicate_leaf_schema(
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& schema,
+        const format::FileColumnPredicateFilter& column_filter);
+
+bool bloom_filter_supported(const ParquetColumnSchema& column_schema) {
+    switch (physical_filter_type(column_schema)) {
+    case TYPE_BOOLEAN:
+    case TYPE_INT:
+    case TYPE_BIGINT:
+    case TYPE_FLOAT:
+    case TYPE_DOUBLE:
+    case TYPE_STRING:
+        return true;
+    default:
+        return false;
+    }
+}
+
+bool bloom_filter_excludes(const ParquetColumnSchema& column_schema,
+                           const format::FileColumnPredicateFilter& column_filter,
+                           const ::parquet::BloomFilter& bloom_filter) {
+    if (!bloom_filter_supported(column_schema)) {
+        return false;
+    }
+    ArrowParquetBloomFilterAdapter adapter(column_schema, bloom_filter);
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (column_predicate == nullptr || !is_bloom_filter_prunable_predicate(*column_predicate)) {
+            return false;
+        }
+        if (!column_predicate->evaluate_and(&adapter)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+struct RowGroupBloomFilterCache {
+    ::parquet::BloomFilterReader* bloom_filter_reader = nullptr;
+    std::map<int, std::unique_ptr<::parquet::BloomFilter>> column_bloom_filters;
+    std::set<int> loaded_columns;
+
+    ::parquet::BloomFilter* get(int row_group_idx, int leaf_column_id,
+                                ParquetPruningStats* pruning_stats) {
+        if (bloom_filter_reader == nullptr || leaf_column_id < 0) {
+            return nullptr;
+        }
+        if (loaded_columns.find(leaf_column_id) == loaded_columns.end()) {
+            loaded_columns.insert(leaf_column_id);
+            try {
+                std::shared_ptr<::parquet::RowGroupBloomFilterReader> row_group_reader;
+                if (pruning_stats != nullptr) {
+                    SCOPED_RAW_TIMER(&pruning_stats->bloom_filter_read_time);
+                    row_group_reader = bloom_filter_reader->RowGroup(row_group_idx);
+                    if (row_group_reader != nullptr) {
+                        column_bloom_filters[leaf_column_id] =
+                                row_group_reader->GetColumnBloomFilter(leaf_column_id);
+                    }
+                } else {
+                    row_group_reader = bloom_filter_reader->RowGroup(row_group_idx);
+                    if (row_group_reader != nullptr) {
+                        column_bloom_filters[leaf_column_id] =
+                                row_group_reader->GetColumnBloomFilter(leaf_column_id);
+                    }
+                }
+            } catch (const ::parquet::ParquetException&) {
+                return nullptr;
+            } catch (const std::exception&) {
+                return nullptr;
+            }
+        }
+        auto it = column_bloom_filters.find(leaf_column_id);
+        return it == column_bloom_filters.end() ? nullptr : it->second.get();
+    }
+};
+
+ParquetRowGroupPruneReason bloom_filter_prune_reason(
+        int row_group_idx, const std::vector<std::unique_ptr<ParquetColumnSchema>>& schema,
+        const format::FileColumnPredicateFilter& column_filter,
+        RowGroupBloomFilterCache* bloom_filter_cache, ParquetPruningStats* pruning_stats) {
+    if (bloom_filter_cache == nullptr || column_filter.predicates.empty()) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    const auto* column_schema = resolve_predicate_leaf_schema(schema, column_filter);
+    if (column_schema == nullptr || !bloom_filter_supported(*column_schema)) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (column_predicate == nullptr || !is_bloom_filter_prunable_predicate(*column_predicate)) {
+            return ParquetRowGroupPruneReason::NONE;
+        }
+    }
+    auto* bloom_filter =
+            bloom_filter_cache->get(row_group_idx, column_schema->leaf_column_id, pruning_stats);
+    if (bloom_filter == nullptr) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    return bloom_filter_excludes(*column_schema, column_filter, *bloom_filter)
+                   ? ParquetRowGroupPruneReason::BLOOM_FILTER
+                   : ParquetRowGroupPruneReason::NONE;
+}
+
+bool is_dictionary_data_encoding(::parquet::Encoding::type encoding) {
+    return encoding == ::parquet::Encoding::PLAIN_DICTIONARY ||
+           encoding == ::parquet::Encoding::RLE_DICTIONARY;
+}
+
+bool is_level_encoding(::parquet::Encoding::type encoding) {
+    return encoding == ::parquet::Encoding::RLE || encoding == ::parquet::Encoding::BIT_PACKED;
+}
+
+bool is_data_page_type(::parquet::PageType::type page_type) {
+    return page_type == ::parquet::PageType::DATA_PAGE ||
+           page_type == ::parquet::PageType::DATA_PAGE_V2;
+}
+
+bool is_dictionary_encoded_chunk(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    if (!column_metadata.has_dictionary_page()) {
+        return false;
+    }
+
+    const auto& encoding_stats = column_metadata.encoding_stats();
+    if (!encoding_stats.empty()) {
+        bool has_dictionary_data_page = false;
+        for (const auto& encoding_stat : encoding_stats) {
+            if (!is_data_page_type(encoding_stat.page_type) || encoding_stat.count <= 0) {
+                continue;
+            }
+            if (!is_dictionary_data_encoding(encoding_stat.encoding)) {
+                return false;
+            }
+            has_dictionary_data_page = true;
+        }
+        return has_dictionary_data_page;
+    }
+
+    bool has_dictionary_encoding = false;
+    for (const auto encoding : column_metadata.encodings()) {
+        if (is_dictionary_data_encoding(encoding)) {
+            has_dictionary_encoding = true;
+            continue;
+        }
+        if (!is_level_encoding(encoding)) {
+            return false;
+        }
+    }
+    return has_dictionary_encoding;
+}
+
+bool supports_dictionary_pruning(const ParquetColumnSchema& column_schema,
+                                 const ::parquet::ColumnChunkMetaData& column_metadata,
+                                 const format::FileColumnPredicateFilter& column_filter) {
+    if (column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE ||
+        column_schema.descriptor == nullptr || column_schema.type == nullptr) {
+        return false;
+    }
+    if (!column_schema.type_descriptor.is_string_like) {
+        return false;
+    }
+    if (column_metadata.type() != ::parquet::Type::BYTE_ARRAY &&
+        column_metadata.type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
+        return false;
+    }
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (column_predicate == nullptr || !is_supported_dictionary_predicate(*column_predicate)) {
+            return false;
+        }
+    }
+    return true;
+}
+
+struct OwnedDictionaryWords {
+    std::vector<std::string> values;
+    std::vector<StringRef> refs;
+
+    void clear() {
+        values.clear();
+        refs.clear();
+    }
+
+    void build_refs() {
+        refs.reserve(values.size());
+        for (const auto& value : values) {
+            refs.emplace_back(value.data(), value.size());
+        }
+    }
+};
+
+bool read_dictionary_words(::parquet::ParquetFileReader* file_reader, int row_group_idx,
+                           int leaf_column_id, const ParquetColumnSchema& column_schema,
+                           OwnedDictionaryWords* dict_words) {
+    DORIS_CHECK(dict_words != nullptr);
+    dict_words->clear();
+    if (file_reader == nullptr || leaf_column_id < 0) {
+        return false;
+    }
+
+    auto row_group_reader = file_reader->RowGroup(row_group_idx);
+    if (row_group_reader == nullptr) {
+        return false;
+    }
+    auto page_reader = row_group_reader->GetColumnPageReader(leaf_column_id);
+    if (page_reader == nullptr) {
+        return false;
+    }
+
+    std::shared_ptr<::parquet::Page> page;
+    try {
+        page = page_reader->NextPage();
+    } catch (const ::parquet::ParquetException&) {
+        return false;
+    } catch (const std::exception&) {
+        return false;
+    }
+    if (page == nullptr || page->type() != ::parquet::PageType::DICTIONARY_PAGE) {
+        return false;
+    }
+    const auto* dictionary_page = static_cast<const ::parquet::DictionaryPage*>(page.get());
+    if (dictionary_page->encoding() != ::parquet::Encoding::PLAIN &&
+        dictionary_page->encoding() != ::parquet::Encoding::PLAIN_DICTIONARY) {
+        return false;
+    }
+    const int32_t dictionary_length = dictionary_page->num_values();
+    if (dictionary_length <= 0) {
+        return false;
+    }
+    const auto* dictionary_data = dictionary_page->data();
+    const int dictionary_size = dictionary_page->size();
+
+    dict_words->values.reserve(static_cast<size_t>(dictionary_length));
+    if (column_schema.descriptor->physical_type() == ::parquet::Type::BYTE_ARRAY) {
+        auto decoder = ::parquet::MakeTypedDecoder<::parquet::ByteArrayType>(
+                ::parquet::Encoding::PLAIN, column_schema.descriptor);
+        decoder->SetData(dictionary_length, dictionary_data, dictionary_size);
+        std::vector<::parquet::ByteArray> byte_array_values(static_cast<size_t>(dictionary_length));
+        if (decoder->Decode(byte_array_values.data(), dictionary_length) != dictionary_length) {
+            return false;
+        }
+        for (int32_t dict_idx = 0; dict_idx < dictionary_length; ++dict_idx) {
+            dict_words->values.emplace_back(
+                    reinterpret_cast<const char*>(byte_array_values[dict_idx].ptr),
+                    byte_array_values[dict_idx].len);
+        }
+        dict_words->build_refs();
+        return true;
+    }
+    if (column_schema.descriptor->physical_type() == ::parquet::Type::FIXED_LEN_BYTE_ARRAY) {
+        const int type_length = column_schema.descriptor->type_length();
+        if (type_length <= 0) {
+            return false;
+        }
+        auto decoder = ::parquet::MakeTypedDecoder<::parquet::FLBAType>(::parquet::Encoding::PLAIN,
+                                                                        column_schema.descriptor);
+        decoder->SetData(dictionary_length, dictionary_data, dictionary_size);
+        std::vector<::parquet::FixedLenByteArray> flba_values(
+                static_cast<size_t>(dictionary_length));
+        if (decoder->Decode(flba_values.data(), dictionary_length) != dictionary_length) {
+            return false;
+        }
+        for (int32_t dict_idx = 0; dict_idx < dictionary_length; ++dict_idx) {
+            dict_words->values.emplace_back(
+                    reinterpret_cast<const char*>(flba_values[dict_idx].ptr), type_length);
+        }
+        dict_words->build_refs();
+        return true;
+    }
+    return false;
+}
+
+segment_v2::ZoneMap to_column_predicate_statistics(const ParquetColumnStatistics& statistics) {
+    segment_v2::ZoneMap predicate_statistics;
+    predicate_statistics.min_value = statistics.min_value;
+    predicate_statistics.max_value = statistics.max_value;
+    predicate_statistics.has_null = statistics.has_null;
+    predicate_statistics.has_not_null = statistics.has_not_null;
+    return predicate_statistics;
+}
+
+const ParquetColumnSchema* find_child_schema_by_local_id(const ParquetColumnSchema& column_schema,
+                                                         int32_t local_id) {
+    const auto child_it = std::ranges::find_if(
+            column_schema.children, [&](const std::unique_ptr<ParquetColumnSchema>& child) {
+                return child != nullptr && child->local_id == local_id;
+            });
+    return child_it == column_schema.children.end() ? nullptr : child_it->get();
+}
+
+const ParquetColumnSchema* resolve_predicate_leaf_schema(
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& schema,
+        const format::FileColumnPredicateFilter& column_filter) {
+    const auto file_column_id = column_filter.effective_file_column_id();
+    if (!file_column_id.is_valid() || file_column_id.value() >= static_cast<int>(schema.size())) {
+        return nullptr;
+    }
+    const ParquetColumnSchema* column_schema = schema[file_column_id.value()].get();
+    if (column_schema == nullptr) {
+        return nullptr;
+    }
+    for (const auto child_local_id : column_filter.effective_file_child_id_path()) {
+        column_schema = find_child_schema_by_local_id(*column_schema, child_local_id);
+        if (column_schema == nullptr) {
+            return nullptr;
+        }
+    }
+    if (column_schema->kind != ParquetColumnSchemaKind::PRIMITIVE ||
+        column_schema->leaf_column_id < 0 || column_schema->max_repetition_level > 0) {
+        return nullptr;
+    }
+    return column_schema;
+}
+
+bool check_statistics(const format::FileColumnPredicateFilter& column_filter,
+                      const ParquetColumnStatistics& statistics) {
+    if (!statistics.has_any_statistics()) {
+        return false;
+    }
+
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (is_null_only_predicate(*column_predicate)) {
+            if (!statistics.has_null_count) {
+                continue;
+            }
+        } else if (!statistics.has_any_statistics()) {
+            continue;
+        }
+        if (!column_predicate->evaluate_and(to_column_predicate_statistics(statistics))) {
+            return true;
+        }
+    }
+    return false;
+}
+
+} // namespace
+
+ParquetColumnStatistics ParquetStatisticsUtils::TransformColumnStatistics(
+        const ParquetColumnSchema& column_schema,
+        const std::shared_ptr<::parquet::Statistics>& statistics, const cctz::time_zone* timezone) {
+    ParquetColumnStatistics result;
+    if (statistics == nullptr) {
+        return result;
+    }
+
+    result.has_null = statistics->HasNullCount() && statistics->null_count() > 0;
+    result.has_not_null = statistics->num_values() > 0 || statistics->HasMinMax();
+    result.has_null_count = statistics->HasNullCount();
+    if (!result.has_not_null || !statistics->HasMinMax()) {
+        return result;
+    }
+
+    DORIS_CHECK(column_schema.type != nullptr);
+    switch (statistics->physical_type()) {
+    case ::parquet::Type::BOOLEAN:
+        result.has_min_max = set_decoded_min_max<::parquet::BooleanType>(
+                statistics, column_schema, DecodedValueKind::BOOL, &result, timezone);
+        return result;
+    case ::parquet::Type::INT32:
+        result.has_min_max = set_decoded_min_max<::parquet::Int32Type>(
+                statistics, column_schema, decoded_value_kind(column_schema.type_descriptor),
+                &result, timezone);
+        return result;
+    case ::parquet::Type::INT64:
+        result.has_min_max = set_decoded_min_max<::parquet::Int64Type>(
+                statistics, column_schema, decoded_value_kind(column_schema.type_descriptor),
+                &result, timezone);
+        return result;
+    case ::parquet::Type::FLOAT:
+        result.has_min_max = set_decoded_min_max<::parquet::FloatType>(
+                statistics, column_schema, DecodedValueKind::FLOAT, &result, timezone);
+        return result;
+    case ::parquet::Type::DOUBLE:
+        result.has_min_max = set_decoded_min_max<::parquet::DoubleType>(
+                statistics, column_schema, DecodedValueKind::DOUBLE, &result, timezone);
+        return result;
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        result.has_min_max = set_string_min_max(statistics, column_schema, &result, timezone);
+        return result;
+    default:
+        return result;
+    }
+}
+
+namespace {
+
+ParquetRowGroupPruneReason row_group_prune_reason(
+        const ::parquet::RowGroupMetaData& row_group, ::parquet::ParquetFileReader* file_reader,
+        int row_group_idx, const std::vector<std::unique_ptr<ParquetColumnSchema>>& schema,
+        const format::FileColumnPredicateFilter& column_filter,
+        RowGroupBloomFilterCache* bloom_filter_cache, ParquetPruningStats* pruning_stats,
+        const cctz::time_zone* timezone) {
+    if (column_filter.predicates.empty()) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    const auto* column_schema = resolve_predicate_leaf_schema(schema, column_filter);
+    if (column_schema == nullptr) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    DCHECK_LT(column_schema->leaf_column_id, row_group.num_columns());
+    auto column_chunk = row_group.ColumnChunk(column_schema->leaf_column_id);
+    if (column_chunk == nullptr) {
+        return ParquetRowGroupPruneReason::NONE;
+    }
+    if (check_statistics(column_filter,
+                         ParquetStatisticsUtils::TransformColumnStatistics(
+                                 *column_schema, column_chunk->statistics(), timezone))) {
+        return ParquetRowGroupPruneReason::STATISTICS;
+    }
+    if (!supports_dictionary_pruning(*column_schema, *column_chunk, column_filter) ||
+        !is_dictionary_encoded_chunk(*column_chunk)) {
+        return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache,
+                                         pruning_stats);
+    }
+    OwnedDictionaryWords dict_words;
+    if (!read_dictionary_words(file_reader, row_group_idx, column_schema->leaf_column_id,
+                               *column_schema, &dict_words)) {
+        return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache,
+                                         pruning_stats);
+    }
+    for (const auto& column_predicate : column_filter.predicates) {
+        if (!column_predicate->evaluate_and(dict_words.refs.data(), dict_words.refs.size())) {
+            return ParquetRowGroupPruneReason::DICTIONARY;
+        }
+    }
+    return bloom_filter_prune_reason(row_group_idx, schema, column_filter, bloom_filter_cache,
+                                     pruning_stats);
+}
+
+void init_bloom_filter_cache(::parquet::ParquetFileReader* file_reader, bool enable_bloom_filter,
+                             RowGroupBloomFilterCache* bloom_filter_cache) {
+    DORIS_CHECK(bloom_filter_cache != nullptr);
+    if (!enable_bloom_filter || file_reader == nullptr) {
+        return;
+    }
+    try {
+        bloom_filter_cache->bloom_filter_reader = &file_reader->GetBloomFilterReader();
+    } catch (const ::parquet::ParquetException&) {
+        bloom_filter_cache->bloom_filter_reader = nullptr;
+    } catch (const std::exception&) {
+        bloom_filter_cache->bloom_filter_reader = nullptr;
+    }
+}
+
+Status select_row_groups(const ::parquet::FileMetaData& metadata,
+                         ::parquet::ParquetFileReader* file_reader,
+                         const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                         const format::FileScanRequest& request,
+                         const std::vector<int>* candidate_row_groups,
+                         std::vector<int>* selected_row_groups, bool enable_bloom_filter,
+                         ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) {
+    int64_t row_group_filter_time_sink = 0;
+    SCOPED_RAW_TIMER(pruning_stats == nullptr ? &row_group_filter_time_sink
+                                              : &pruning_stats->row_group_filter_time);
+    if (selected_row_groups == nullptr) {
+        return Status::InvalidArgument("selected_row_groups is null");
+    }
+    selected_row_groups->clear();
+
+    const int num_row_groups = metadata.num_row_groups();
+    if (pruning_stats != nullptr) {
+        pruning_stats->total_row_groups = num_row_groups;
+    }
+    const auto candidate_size = candidate_row_groups == nullptr
+                                        ? static_cast<size_t>(num_row_groups)
+                                        : candidate_row_groups->size();
+    selected_row_groups->reserve(candidate_size);
+    for (size_t candidate_idx = 0; candidate_idx < candidate_size; ++candidate_idx) {
+        const int row_group_idx = candidate_row_groups == nullptr
+                                          ? static_cast<int>(candidate_idx)
+                                          : (*candidate_row_groups)[candidate_idx];
+        DORIS_CHECK(row_group_idx >= 0);
+        DORIS_CHECK(row_group_idx < num_row_groups);
+        auto row_group = metadata.RowGroup(row_group_idx);
+        if (row_group == nullptr) {
+            selected_row_groups->push_back(row_group_idx);
+            continue;
+        }
+        bool drop = false;
+        RowGroupBloomFilterCache bloom_filter_cache;
+        init_bloom_filter_cache(file_reader, enable_bloom_filter, &bloom_filter_cache);
+        for (const auto& column_filter : request.column_predicate_filters) {
+            const auto prune_reason = row_group_prune_reason(
+                    *row_group, file_reader, row_group_idx, file_schema, column_filter,
+                    &bloom_filter_cache, pruning_stats, timezone);
+            if (prune_reason == ParquetRowGroupPruneReason::NONE) {
+                continue;
+            }
+            drop = true;
+            if (pruning_stats != nullptr) {
+                pruning_stats->filtered_group_rows += row_group->num_rows();
+                if (prune_reason == ParquetRowGroupPruneReason::STATISTICS) {
+                    ++pruning_stats->filtered_row_groups_by_statistics;
+                } else if (prune_reason == ParquetRowGroupPruneReason::DICTIONARY) {
+                    ++pruning_stats->filtered_row_groups_by_dictionary;
+                } else if (prune_reason == ParquetRowGroupPruneReason::BLOOM_FILTER) {
+                    ++pruning_stats->filtered_row_groups_by_bloom_filter;
+                }
+                break;
+            }
+            break;
+        }
+        if (drop) {
+            continue;
+        }
+        selected_row_groups->push_back(row_group_idx);
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+bool ParquetStatisticsUtils::BloomFilterExcludes(
+        const ParquetColumnSchema& column_schema,
+        const format::FileColumnPredicateFilter& column_filter,
+        const ::parquet::BloomFilter& bloom_filter) {
+    return bloom_filter_excludes(column_schema, column_filter, bloom_filter);
+}
+
+Status select_row_groups_by_statistics(
+        const ::parquet::FileMetaData& metadata, ::parquet::ParquetFileReader* file_reader,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, const std::vector<int>* candidate_row_groups,
+        std::vector<int>* selected_row_groups, bool enable_bloom_filter,
+        ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) {
+    return select_row_groups(metadata, file_reader, file_schema, request, candidate_row_groups,
+                             selected_row_groups, enable_bloom_filter, pruning_stats, timezone);
+}
+
+namespace {
+
+template <typename ParquetDType>
+bool set_page_decoded_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index,
+                              const ParquetColumnSchema& column_schema, size_t page_idx,
+                              DecodedValueKind value_kind, ParquetColumnStatistics* page_statistics,
+                              const cctz::time_zone* timezone) {
+    const auto typed_index =
+            std::static_pointer_cast<::parquet::TypedColumnIndex<ParquetDType>>(column_index);
+    if (page_idx >= typed_index->min_values().size() ||
+        page_idx >= typed_index->max_values().size()) {
+        return false;
+    }
+    if (!set_decoded_field(column_schema, value_kind, typed_index->min_values()[page_idx],
+                           &page_statistics->min_value, timezone) ||
+        !set_decoded_field(column_schema, value_kind, typed_index->max_values()[page_idx],
+                           &page_statistics->max_value, timezone)) {
+        return false;
+    }
+    page_statistics->has_min_max = true;
+    return true;
+}
+
+bool set_page_string_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index,
+                             const ParquetColumnSchema& column_schema, size_t page_idx,
+                             ParquetColumnStatistics* page_statistics,
+                             const cctz::time_zone* timezone) {
+    switch (column_schema.descriptor->physical_type()) {
+    case ::parquet::Type::BYTE_ARRAY: {
+        const auto typed_index =
+                std::static_pointer_cast<::parquet::ByteArrayColumnIndex>(column_index);
+        if (page_idx >= typed_index->min_values().size() ||
+            page_idx >= typed_index->max_values().size()) {
+            return false;
+        }
+        const auto min = ::parquet::ByteArrayToString(typed_index->min_values()[page_idx]);
+        const auto max = ::parquet::ByteArrayToString(typed_index->max_values()[page_idx]);
+        if (!set_decoded_binary_field(column_schema, DecodedValueKind::BINARY,
+                                      StringRef(min.data(), min.size()),
+                                      &page_statistics->min_value, timezone) ||
+            !set_decoded_binary_field(column_schema, DecodedValueKind::BINARY,
+                                      StringRef(max.data(), max.size()),
+                                      &page_statistics->max_value, timezone)) {
+            return false;
+        }
+        page_statistics->has_min_max = true;
+        return true;
+    }
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY: {
+        const int type_length = column_schema.descriptor->type_length();
+        if (type_length <= 0) {
+            return false;
+        }
+        const auto typed_index = std::static_pointer_cast<::parquet::FLBAColumnIndex>(column_index);
+        if (page_idx >= typed_index->min_values().size() ||
+            page_idx >= typed_index->max_values().size()) {
+            return false;
+        }
+        const std::string min(
+                reinterpret_cast<const char*>(typed_index->min_values()[page_idx].ptr),
+                type_length);
+        const std::string max(
+                reinterpret_cast<const char*>(typed_index->max_values()[page_idx].ptr),
+                type_length);
+        if (!set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY,
+                                      StringRef(min.data(), min.size()),
+                                      &page_statistics->min_value, timezone) ||
+            !set_decoded_binary_field(column_schema, DecodedValueKind::FIXED_BINARY,
+                                      StringRef(max.data(), max.size()),
+                                      &page_statistics->max_value, timezone)) {
+            return false;
+        }
+        page_statistics->has_min_max = true;
+        return true;
+    }
+    default:
+        return false;
+    }
+}
+
+bool set_page_min_max(const std::shared_ptr<::parquet::ColumnIndex>& column_index,
+                      const ParquetColumnSchema& column_schema, size_t page_idx,
+                      ParquetColumnStatistics* page_statistics, const cctz::time_zone* timezone) {
+    DORIS_CHECK(column_schema.type != nullptr);
+    switch (column_schema.descriptor->physical_type()) {
+    case ::parquet::Type::BOOLEAN:
+        return set_page_decoded_min_max<::parquet::BooleanType>(column_index, column_schema,
+                                                                page_idx, DecodedValueKind::BOOL,
+                                                                page_statistics, timezone);
+    case ::parquet::Type::INT32:
+        return set_page_decoded_min_max<::parquet::Int32Type>(
+                column_index, column_schema, page_idx,
+                decoded_value_kind(column_schema.type_descriptor), page_statistics, timezone);
+    case ::parquet::Type::INT64:
+        return set_page_decoded_min_max<::parquet::Int64Type>(
+                column_index, column_schema, page_idx,
+                decoded_value_kind(column_schema.type_descriptor), page_statistics, timezone);
+    case ::parquet::Type::FLOAT:
+        return set_page_decoded_min_max<::parquet::FloatType>(column_index, column_schema, page_idx,
+                                                              DecodedValueKind::FLOAT,
+                                                              page_statistics, timezone);
+    case ::parquet::Type::DOUBLE:
+        return set_page_decoded_min_max<::parquet::DoubleType>(column_index, column_schema,
+                                                               page_idx, DecodedValueKind::DOUBLE,
+                                                               page_statistics, timezone);
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        return set_page_string_min_max(column_index, column_schema, page_idx, page_statistics,
+                                       timezone);
+    default:
+        return false;
+    }
+}
+
+bool build_page_statistics(const std::shared_ptr<::parquet::ColumnIndex>& column_index,
+                           const ParquetColumnSchema& column_schema, size_t page_idx,
+                           ParquetColumnStatistics* page_statistics,
+                           const cctz::time_zone* timezone) {
+    DORIS_CHECK(page_statistics != nullptr);
+    *page_statistics = ParquetColumnStatistics {};
+
+    const auto& null_pages = column_index->null_pages();
+    if (!column_index->has_null_counts() || page_idx >= null_pages.size() ||
+        page_idx >= column_index->null_counts().size()) {
+        return false;
+    }
+
+    page_statistics->has_null_count = true;
+    page_statistics->has_null = column_index->null_counts()[page_idx] > 0;
+    page_statistics->has_not_null = !null_pages[page_idx];
+    if (!page_statistics->has_not_null) {
+        return true;
+    }
+    return set_page_min_max(column_index, column_schema, page_idx, page_statistics, timezone);
+}
+
+std::vector<RowRange> intersect_ranges(const std::vector<RowRange>& left,
+                                       const std::vector<RowRange>& right) {
+    std::vector<RowRange> result;
+    size_t left_idx = 0;
+    size_t right_idx = 0;
+    while (left_idx < left.size() && right_idx < right.size()) {
+        const int64_t left_start = left[left_idx].start;
+        const int64_t left_end = left_start + left[left_idx].length;
+        const int64_t right_start = right[right_idx].start;
+        const int64_t right_end = right_start + right[right_idx].length;
+        const int64_t start = std::max(left_start, right_start);
+        const int64_t end = std::min(left_end, right_end);
+        if (start < end) {
+            result.push_back(RowRange {start, end - start});
+        }
+        if (left_end < right_end) {
+            ++left_idx;
+        } else {
+            ++right_idx;
+        }
+    }
+    return result;
+}
+
+int64_t count_range_rows(const std::vector<RowRange>& ranges) {
+    int64_t rows = 0;
+    for (const auto& range : ranges) {
+        rows += range.length;
+    }
+    return rows;
+}
+
+RowRange page_row_range(const ::parquet::OffsetIndex& offset_index, size_t page_idx,
+                        int64_t row_group_rows) {
+    const auto& page_locations = offset_index.page_locations();
+    const int64_t start = page_locations[page_idx].first_row_index;
+    const int64_t end = page_idx + 1 == page_locations.size()
+                                ? row_group_rows
+                                : page_locations[page_idx + 1].first_row_index;
+    DORIS_CHECK(start >= 0);
+    DORIS_CHECK(end >= start);
+    DORIS_CHECK(end <= row_group_rows);
+    return RowRange {start, end - start};
+}
+
+void append_row_range(const RowRange& range, std::vector<RowRange>* ranges) {
+    if (range.length == 0) {
+        return;
+    }
+    if (!ranges->empty()) {
+        auto& previous = ranges->back();
+        if (previous.start + previous.length == range.start) {
+            previous.length += range.length;
+            return;
+        }
+    }
+    ranges->push_back(range);
+}
+
+bool select_ranges_for_filter(const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group,
+                              const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                              const format::FileColumnPredicateFilter& column_filter,
+                              int64_t row_group_rows, std::vector<RowRange>* ranges,
+                              const cctz::time_zone* timezone) {
+    if (column_filter.predicates.empty()) {
+        return false;
+    }
+    const auto* column_schema = resolve_predicate_leaf_schema(file_schema, column_filter);
+    if (column_schema == nullptr || column_schema->descriptor == nullptr) {
+        return false;
+    }
+
+    std::shared_ptr<::parquet::ColumnIndex> column_index;
+    std::shared_ptr<::parquet::OffsetIndex> offset_index;
+    try {
+        column_index = row_group->GetColumnIndex(column_schema->leaf_column_id);
+        offset_index = row_group->GetOffsetIndex(column_schema->leaf_column_id);
+    } catch (const ::parquet::ParquetException&) {
+        return false;
+    } catch (const std::exception&) {
+        return false;
+    }
+    if (column_index == nullptr || offset_index == nullptr ||
+        column_index->null_pages().size() != offset_index->page_locations().size()) {
+        return false;
+    }
+
+    ranges->clear();
+    const auto page_count = offset_index->page_locations().size();
+    for (size_t page_idx = 0; page_idx < page_count; ++page_idx) {
+        ParquetColumnStatistics page_statistics;
+        if (!build_page_statistics(column_index, *column_schema, page_idx, &page_statistics,
+                                   timezone)) {
+            ranges->clear();
+            return false;
+        }
+        const RowRange row_range = page_row_range(*offset_index, page_idx, row_group_rows);
+        if (check_statistics(column_filter, page_statistics)) {
+            continue;
+        }
+        append_row_range(row_range, ranges);
+    }
+    return true;
+}
+
+bool ranges_intersect(const std::vector<RowRange>& ranges, const RowRange& range) {
+    const int64_t range_end = range.start + range.length;
+    for (const auto& selected_range : ranges) {
+        const int64_t selected_end = selected_range.start + selected_range.length;
+        if (selected_end <= range.start) {
+            continue;
+        }
+        if (selected_range.start >= range_end) {
+            return false;
+        }
+        return true;
+    }
+    return false;
+}
+
+void collect_leaf_schemas(const ParquetColumnSchema& column_schema,
+                          const format::LocalColumnIndex* projection,
+                          std::vector<const ParquetColumnSchema*>* leaf_schemas) {
+    if (column_schema.kind == ParquetColumnSchemaKind::PRIMITIVE) {
+        leaf_schemas->push_back(&column_schema);
+        return;
+    }
+    for (const auto& child_schema : column_schema.children) {
+        if (!format::is_child_projected(projection, child_schema->local_id)) {
+            continue;
+        }
+        const auto* child_projection =
+                format::find_child_projection(projection, child_schema->local_id);
+        collect_leaf_schemas(*child_schema, child_projection, leaf_schemas);
+    }
+}
+
+void collect_request_leaf_schemas(
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request,
+        std::vector<const ParquetColumnSchema*>* leaf_schemas) {
+    std::set<int> seen_leaf_ids;
+    auto collect_projection = [&](const format::LocalColumnIndex& projection) {
+        const int32_t local_id = projection.local_id();
+        if (local_id < 0 || local_id >= static_cast<int32_t>(file_schema.size())) {
+            return;
+        }
+        std::vector<const ParquetColumnSchema*> projection_leaf_schemas;
+        collect_leaf_schemas(*file_schema[local_id], &projection, &projection_leaf_schemas);
+        for (const auto* leaf_schema : projection_leaf_schemas) {
+            DORIS_CHECK(leaf_schema != nullptr);
+            if (seen_leaf_ids.insert(leaf_schema->leaf_column_id).second) {
+                leaf_schemas->push_back(leaf_schema);
+            }
+        }
+    };
+    for (const auto& projection : request.predicate_columns) {
+        collect_projection(projection);
+    }
+    for (const auto& projection : request.non_predicate_columns) {
+        collect_projection(projection);
+    }
+    for (const auto& column_filter : request.column_predicate_filters) {
+        const auto* leaf_schema = resolve_predicate_leaf_schema(file_schema, column_filter);
+        if (leaf_schema == nullptr) {
+            continue;
+        }
+        if (seen_leaf_ids.insert(leaf_schema->leaf_column_id).second) {
+            leaf_schemas->push_back(leaf_schema);
+        }
+    }
+}
+
+bool build_page_skip_plan_for_leaf(
+        const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group,
+        const ParquetColumnSchema& column_schema, const std::vector<RowRange>& selected_ranges,
+        int64_t row_group_rows, ParquetPageSkipPlan* page_skip_plan) {
+    DORIS_CHECK(page_skip_plan != nullptr);
+    *page_skip_plan = ParquetPageSkipPlan {};
+    // OffsetIndex first_row_index is row-based only for non-repeated leaves. LIST/MAP/repeated
+    // leaves need repetition-level-aware range mapping and are intentionally left out for now.
+    if (column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE ||
+        column_schema.descriptor == nullptr || column_schema.leaf_column_id < 0 ||
+        column_schema.descriptor->max_repetition_level() != 0) {
+        return false;
+    }
+
+    std::shared_ptr<::parquet::OffsetIndex> offset_index;
+    try {
+        offset_index = row_group->GetOffsetIndex(column_schema.leaf_column_id);
+    } catch (const ::parquet::ParquetException&) {
+        return false;
+    } catch (const std::exception&) {
+        return false;
+    }
+    if (offset_index == nullptr) {
+        return false;
+    }
+
+    const auto page_count = offset_index->page_locations().size();
+    page_skip_plan->leaf_column_id = column_schema.leaf_column_id;
+    page_skip_plan->skipped_pages.resize(page_count);
+    page_skip_plan->skipped_page_compressed_sizes.resize(page_count);
+    const auto& page_locations = offset_index->page_locations();
+    for (size_t page_idx = 0; page_idx < page_count; ++page_idx) {
+        const RowRange row_range = page_row_range(*offset_index, page_idx, row_group_rows);
+        if (row_range.length == 0 || ranges_intersect(selected_ranges, row_range)) {
+            continue;
+        }
+        page_skip_plan->skipped_pages[page_idx] = 1;
+        page_skip_plan->skipped_page_compressed_sizes[page_idx] =
+                page_locations[page_idx].compressed_page_size;
+        append_row_range(row_range, &page_skip_plan->skipped_ranges);
+    }
+    if (page_skip_plan->empty()) {
+        *page_skip_plan = ParquetPageSkipPlan {};
+        return false;
+    }
+    return true;
+}
+
+void build_page_skip_plans(const std::shared_ptr<::parquet::RowGroupPageIndexReader>& row_group,
+                           const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+                           const format::FileScanRequest& request,
+                           const std::vector<RowRange>& selected_ranges, int64_t row_group_rows,
+                           std::map<int, ParquetPageSkipPlan>* page_skip_plans) {
+    DORIS_CHECK(page_skip_plans != nullptr);
+    page_skip_plans->clear();
+    std::vector<const ParquetColumnSchema*> leaf_schemas;
+    collect_request_leaf_schemas(file_schema, request, &leaf_schemas);
+    for (const auto* leaf_schema : leaf_schemas) {
+        DORIS_CHECK(leaf_schema != nullptr);
+        ParquetPageSkipPlan page_skip_plan;
+        if (build_page_skip_plan_for_leaf(row_group, *leaf_schema, selected_ranges, row_group_rows,
+                                          &page_skip_plan)) {
+            page_skip_plans->emplace(page_skip_plan.leaf_column_id, std::move(page_skip_plan));
+        }
+    }
+}
+
+} // namespace
+
+Status select_row_group_ranges_by_page_index(
+        ::parquet::ParquetFileReader* file_reader,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, int row_group_idx, int64_t row_group_rows,
+        std::vector<RowRange>* selected_ranges, std::map<int, ParquetPageSkipPlan>* page_skip_plans,
+        ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone) {
+    int64_t page_index_filter_time_sink = 0;
+    SCOPED_RAW_TIMER(pruning_stats == nullptr ? &page_index_filter_time_sink
+                                              : &pruning_stats->page_index_filter_time);
+    DORIS_CHECK(selected_ranges != nullptr);
+    selected_ranges->clear();
+    if (page_skip_plans != nullptr) {
+        page_skip_plans->clear();
+    }
+    if (row_group_rows <= 0) {
+        return Status::OK();
+    }
+    selected_ranges->push_back(RowRange {0, row_group_rows});
+    if (!config::enable_parquet_page_index || request.column_predicate_filters.empty() ||
+        file_reader == nullptr) {
+        return Status::OK();
+    }
+
+    std::shared_ptr<::parquet::PageIndexReader> page_index_reader;
+    std::shared_ptr<::parquet::RowGroupPageIndexReader> row_group_index_reader;
+    try {
+        if (pruning_stats != nullptr) {
+            ++pruning_stats->page_index_read_calls;
+        }
+        {
+            int64_t read_page_index_time_sink = 0;
+            SCOPED_RAW_TIMER(pruning_stats == nullptr ? &read_page_index_time_sink
+                                                      : &pruning_stats->read_page_index_time);
+            page_index_reader = file_reader->GetPageIndexReader();
+            if (page_index_reader == nullptr) {
+                return Status::OK();
+            }
+            row_group_index_reader = page_index_reader->RowGroup(row_group_idx);
+        }
+    } catch (const ::parquet::ParquetException&) {
+        return Status::OK();
+    } catch (const std::exception&) {
+        return Status::OK();
+    }
+    if (row_group_index_reader == nullptr) {
+        return Status::OK();
+    }
+
+    for (const auto& column_filter : request.column_predicate_filters) {
+        std::vector<RowRange> filter_ranges;
+        if (!select_ranges_for_filter(row_group_index_reader, file_schema, column_filter,
+                                      row_group_rows, &filter_ranges, timezone)) {
+            continue;
+        }
+        *selected_ranges = intersect_ranges(*selected_ranges, filter_ranges);
+        if (selected_ranges->empty()) {
+            if (page_skip_plans != nullptr) {
+                page_skip_plans->clear();
+            }
+            if (pruning_stats != nullptr) {
+                pruning_stats->filtered_page_rows += row_group_rows;
+                ++pruning_stats->filtered_row_groups_by_page_index;
+            }
+            return Status::OK();
+        }
+    }
+    if (page_skip_plans != nullptr) {
+        build_page_skip_plans(row_group_index_reader, file_schema, request, *selected_ranges,
+                              row_group_rows, page_skip_plans);
+    }
+    if (pruning_stats != nullptr) {
+        const int64_t selected_rows = count_range_rows(*selected_ranges);
+        DORIS_CHECK(selected_rows <= row_group_rows);
+        pruning_stats->filtered_page_rows += row_group_rows - selected_rows;
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_statistics.h b/be/src/format_v2/parquet/parquet_statistics.h
new file mode 100644
index 00000000000000..3d4b9d3579185d
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_statistics.h
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <vector>
+
+#include "common/status.h"
+#include "core/field.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/selection_vector.h"
+
+namespace parquet {
+class BloomFilter;
+class FileMetaData;
+class ParquetFileReader;
+class Statistics;
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris {
+class ColumnPredicate;
+} // namespace doris
+
+namespace doris::format::parquet {
+
+struct ParquetColumnSchema;
+
+// ============================================================================
+// ============================================================================
+
+struct ParquetPruningStats {
+    int64_t total_row_groups = 0;                    // total row groups in the file
+    int64_t selected_row_groups = 0;                 // row groups selected after pruning
+    int64_t filtered_row_groups_by_statistics = 0;   // row groups pruned by min/max statistics
+    int64_t filtered_row_groups_by_dictionary = 0;   // row groups pruned by dictionary
+    int64_t filtered_row_groups_by_bloom_filter = 0; // row groups pruned by bloom filter
+    int64_t filtered_row_groups_by_page_index = 0;   // row groups fully pruned by page index
+    int64_t filtered_group_rows = 0;                 // rows in pruned row groups
+    int64_t filtered_page_rows = 0;                  // rows pruned by page index
+    int64_t selected_row_ranges = 0;                 // selected row range count
+    int64_t page_index_read_calls = 0;               // Page Index read count
+    int64_t bloom_filter_read_time = 0;              // Bloom filter read time (ns)
+    int64_t row_group_filter_time = 0;               // row-group pruning time (ns)
+    int64_t page_index_filter_time = 0;              // page-index pruning time (ns)
+    int64_t read_page_index_time = 0;                // page-index read time (ns)
+};
+
+struct ParquetColumnStatistics {
+    Field min_value;             // column minimum value converted to Doris type
+    Field max_value;             // column maximum value
+    bool has_null = false;       // whether NULL exists
+    bool has_not_null = false;   // whether non-NULL values exist
+    bool has_null_count = false; // whether null_count is valid
+    bool has_min_max = false;    // whether min/max is valid after conversion
+
+    bool has_any_statistics() const { return has_null_count || has_min_max; }
+};
+
+// ============================================================================
+// ============================================================================
+//     statistics(TransformColumnStatistics + check_statistics)
+//     -> dictionary(read_dictionary_words + predicate::evaluate_and)
+//     -> bloom filter(bloom_filter_prune_reason)
+// ============================================================================
+struct ParquetStatisticsUtils {
+    static ParquetColumnStatistics TransformColumnStatistics(
+            const ParquetColumnSchema& column_schema,
+            const std::shared_ptr<::parquet::Statistics>& statistics,
+            const cctz::time_zone* timezone = nullptr);
+
+    static bool BloomFilterExcludes(const ParquetColumnSchema& column_schema,
+                                    const format::FileColumnPredicateFilter& column_filter,
+                                    const ::parquet::BloomFilter& bloom_filter);
+};
+
+Status select_row_groups_by_statistics(
+        const ::parquet::FileMetaData& metadata, ::parquet::ParquetFileReader* file_reader,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, const std::vector<int>* candidate_row_groups,
+        std::vector<int>* selected_row_groups, bool enable_bloom_filter,
+        ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone = nullptr);
+
+Status select_row_group_ranges_by_page_index(
+        ::parquet::ParquetFileReader* file_reader,
+        const std::vector<std::unique_ptr<ParquetColumnSchema>>& file_schema,
+        const format::FileScanRequest& request, int row_group_idx, int64_t row_group_rows,
+        std::vector<RowRange>* selected_ranges, std::map<int, ParquetPageSkipPlan>* page_skip_plans,
+        ParquetPruningStats* pruning_stats, const cctz::time_zone* timezone = nullptr);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_type.cpp b/be/src/format_v2/parquet/parquet_type.cpp
new file mode 100644
index 00000000000000..d35181d0397178
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_type.cpp
@@ -0,0 +1,358 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_type.h"
+
+#include <parquet/api/schema.h>
+
+#include <memory>
+#include <string>
+
+#include "core/data_type/data_type_factory.hpp"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/primitive_type.h"
+
+namespace doris::format::parquet {
+namespace {
+
+DataTypePtr create_type(PrimitiveType type, bool nullable, int precision = 0, int scale = 0) {
+    return DataTypeFactory::instance().create_data_type(type, nullable, precision, scale);
+}
+
+PrimitiveType decimal_primitive_type(int precision) {
+    return precision > 38 ? TYPE_DECIMAL256 : TYPE_DECIMAL128I;
+}
+
+void mark_decimal(const ::parquet::ColumnDescriptor* column, int precision, int scale,
+                  ParquetTypeDescriptor* result) {
+    result->is_decimal = true;
+    result->decimal_precision = precision;
+    result->decimal_scale = scale;
+    switch (column->physical_type()) {
+    case ::parquet::Type::INT32:
+        result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_INT32;
+        break;
+    case ::parquet::Type::INT64:
+        result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_INT64;
+        break;
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        result->extra_type_info = ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY;
+        break;
+    default:
+        result->extra_type_info = ParquetExtraTypeInfo::NONE;
+        break;
+    }
+}
+
+void mark_integer(int bit_width, bool is_signed, ParquetTypeDescriptor* result) {
+    result->integer_bit_width = bit_width;
+    result->is_unsigned_integer = !is_signed;
+}
+
+DataTypePtr converted_type_to_doris_type(const ::parquet::ColumnDescriptor* column,
+                                         ParquetTypeDescriptor* result) {
+    const bool nullable = column->max_definition_level() > 0;
+    switch (column->converted_type()) {
+    case ::parquet::ConvertedType::UTF8:
+    case ::parquet::ConvertedType::ENUM:
+    case ::parquet::ConvertedType::JSON:
+    case ::parquet::ConvertedType::BSON:
+        return create_type(TYPE_STRING, nullable);
+    case ::parquet::ConvertedType::DECIMAL:
+        mark_decimal(column, column->type_precision(), column->type_scale(), result);
+        return create_type(decimal_primitive_type(column->type_precision()), nullable,
+                           column->type_precision(), column->type_scale());
+    case ::parquet::ConvertedType::DATE:
+        return create_type(TYPE_DATEV2, nullable);
+    case ::parquet::ConvertedType::TIME_MILLIS:
+        result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported";
+        return nullptr;
+    case ::parquet::ConvertedType::TIME_MICROS:
+        result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported";
+        return nullptr;
+    case ::parquet::ConvertedType::TIMESTAMP_MILLIS:
+        result->is_timestamp = true;
+        result->timestamp_is_adjusted_to_utc = true;
+        result->time_unit = ParquetTimeUnit::MILLIS;
+        result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS;
+        return create_type(TYPE_DATETIMEV2, nullable, 0, 3);
+    case ::parquet::ConvertedType::TIMESTAMP_MICROS:
+        result->is_timestamp = true;
+        result->timestamp_is_adjusted_to_utc = true;
+        result->time_unit = ParquetTimeUnit::MICROS;
+        result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS;
+        return create_type(TYPE_DATETIMEV2, nullable, 0, 6);
+    // Parquet stores signed and unsigned integer logical annotations on signed physical carriers:
+    // INT_8/UINT_8/INT_16/UINT_16/INT_32/UINT_32 use physical INT32, and
+    // INT_64/UINT_64 use physical INT64. Doris maps unsigned integers to the next wider
+    // signed type so all values in the unsigned range can be represented.
+    case ::parquet::ConvertedType::INT_8:
+        mark_integer(8, true, result);
+        return create_type(TYPE_TINYINT, nullable);
+    case ::parquet::ConvertedType::UINT_8:
+        mark_integer(8, false, result);
+        return create_type(TYPE_SMALLINT, nullable);
+    case ::parquet::ConvertedType::INT_16:
+        mark_integer(16, true, result);
+        return create_type(TYPE_SMALLINT, nullable);
+    case ::parquet::ConvertedType::UINT_16:
+        mark_integer(16, false, result);
+        return create_type(TYPE_INT, nullable);
+    case ::parquet::ConvertedType::INT_32:
+        mark_integer(32, true, result);
+        return create_type(TYPE_INT, nullable);
+    case ::parquet::ConvertedType::UINT_32:
+        mark_integer(32, false, result);
+        return create_type(TYPE_BIGINT, nullable);
+    case ::parquet::ConvertedType::INT_64:
+        mark_integer(64, true, result);
+        return create_type(TYPE_BIGINT, nullable);
+    case ::parquet::ConvertedType::UINT_64:
+        mark_integer(64, false, result);
+        return create_type(TYPE_LARGEINT, nullable);
+    case ::parquet::ConvertedType::NONE:
+    default:
+        return nullptr;
+    }
+}
+
+DataTypePtr logical_type_to_doris_type(const ::parquet::ColumnDescriptor* column,
+                                       ParquetTypeDescriptor* result) {
+    const auto& logical_type = column->logical_type();
+    if (logical_type == nullptr || !logical_type->is_valid() || logical_type->is_none()) {
+        return nullptr;
+    }
+    const bool nullable = column->max_definition_level() > 0;
+    if (logical_type->is_string() || logical_type->is_enum() || logical_type->is_JSON() ||
+        logical_type->is_BSON() || logical_type->is_UUID()) {
+        return create_type(TYPE_STRING, nullable);
+    }
+    if (logical_type->is_decimal()) {
+        const auto& decimal_type = static_cast<const ::parquet::DecimalLogicalType&>(*logical_type);
+        mark_decimal(column, decimal_type.precision(), decimal_type.scale(), result);
+        return create_type(decimal_primitive_type(decimal_type.precision()), nullable,
+                           decimal_type.precision(), decimal_type.scale());
+    }
+    if (logical_type->is_date()) {
+        return create_type(TYPE_DATEV2, nullable);
+    }
+    if (logical_type->is_time()) {
+        const auto& time_type = static_cast<const ::parquet::TimeLogicalType&>(*logical_type);
+        if (time_type.is_adjusted_to_utc()) {
+            result->unsupported_reason = "Parquet TIME with isAdjustedToUTC=true is not supported";
+            return nullptr;
+        }
+        int scale = 0;
+        if (time_type.time_unit() == ::parquet::LogicalType::TimeUnit::MILLIS) {
+            scale = 3;
+            result->time_unit = ParquetTimeUnit::MILLIS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS;
+        } else if (time_type.time_unit() == ::parquet::LogicalType::TimeUnit::MICROS) {
+            scale = 6;
+            result->time_unit = ParquetTimeUnit::MICROS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS;
+        } else {
+            return nullptr;
+        }
+        return create_type(TYPE_TIMEV2, nullable, 0, scale);
+    }
+    if (logical_type->is_timestamp()) {
+        const auto& timestamp_type =
+                static_cast<const ::parquet::TimestampLogicalType&>(*logical_type);
+        int scale = 0;
+        if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::MILLIS) {
+            scale = 3;
+            result->time_unit = ParquetTimeUnit::MILLIS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_MS;
+        } else if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::MICROS) {
+            scale = 6;
+            result->time_unit = ParquetTimeUnit::MICROS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_MICROS;
+        } else if (timestamp_type.time_unit() == ::parquet::LogicalType::TimeUnit::NANOS) {
+            scale = 6;
+            result->time_unit = ParquetTimeUnit::NANOS;
+            result->extra_type_info = ParquetExtraTypeInfo::UNIT_NS;
+        } else {
+            return nullptr;
+        }
+        result->is_timestamp = true;
+        result->timestamp_is_adjusted_to_utc = timestamp_type.is_adjusted_to_utc();
+        return create_type(TYPE_DATETIMEV2, nullable, 0, scale);
+    }
+    if (logical_type->is_int()) {
+        const auto& int_type = static_cast<const ::parquet::IntLogicalType&>(*logical_type);
+        mark_integer(int_type.bit_width(), int_type.is_signed(), result);
+        switch (int_type.bit_width()) {
+        case 8:
+            return create_type(int_type.is_signed() ? TYPE_TINYINT : TYPE_SMALLINT, nullable);
+        case 16:
+            return create_type(int_type.is_signed() ? TYPE_SMALLINT : TYPE_INT, nullable);
+        case 32:
+            return create_type(int_type.is_signed() ? TYPE_INT : TYPE_BIGINT, nullable);
+        case 64:
+            return create_type(int_type.is_signed() ? TYPE_BIGINT : TYPE_LARGEINT, nullable);
+        default:
+            return nullptr;
+        }
+    }
+    if (logical_type->is_float16()) {
+        if (column->physical_type() != ::parquet::Type::FIXED_LEN_BYTE_ARRAY ||
+            column->type_length() != 2) {
+            return nullptr;
+        }
+        result->extra_type_info = ParquetExtraTypeInfo::FLOAT16;
+        return create_type(TYPE_FLOAT, nullable);
+    }
+    return nullptr;
+}
+
+DataTypePtr physical_type_to_doris_type(const ::parquet::ColumnDescriptor* column) {
+    const bool nullable = column->max_definition_level() > 0;
+    DataTypePtr type;
+    switch (column->physical_type()) {
+    case ::parquet::Type::BOOLEAN:
+        type = std::make_shared<DataTypeBool>();
+        break;
+    case ::parquet::Type::INT32:
+        type = std::make_shared<DataTypeInt32>();
+        break;
+    case ::parquet::Type::INT64:
+        type = std::make_shared<DataTypeInt64>();
+        break;
+    case ::parquet::Type::FLOAT:
+        type = std::make_shared<DataTypeFloat32>();
+        break;
+    case ::parquet::Type::DOUBLE:
+        type = std::make_shared<DataTypeFloat64>();
+        break;
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        type = std::make_shared<DataTypeString>();
+        break;
+    case ::parquet::Type::INT96:
+        type = create_type(TYPE_DATETIMEV2, nullable, 0, 6);
+        break;
+    default:
+        return nullptr;
+    }
+    return nullable ? make_nullable(type) : type;
+}
+
+bool record_reader_physical_type_supported(::parquet::Type::type physical_type) {
+    switch (physical_type) {
+    case ::parquet::Type::BOOLEAN:
+    case ::parquet::Type::INT32:
+    case ::parquet::Type::INT64:
+    case ::parquet::Type::INT96:
+    case ::parquet::Type::FLOAT:
+    case ::parquet::Type::DOUBLE:
+    case ::parquet::Type::BYTE_ARRAY:
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        return true;
+    default:
+        return false;
+    }
+}
+
+} // namespace
+
+std::string parquet_column_name(const ::parquet::ColumnDescriptor* column) {
+    if (column == nullptr) {
+        return {};
+    }
+    auto path = column->path();
+    if (path) {
+        return path->ToDotString();
+    }
+    return column->name();
+}
+
+ParquetTypeDescriptor resolve_parquet_type(const ::parquet::ColumnDescriptor* column) {
+    ParquetTypeDescriptor result;
+    if (column == nullptr) {
+        return result;
+    }
+
+    result.physical_type = column->physical_type();
+    result.converted_type = column->converted_type();
+    result.fixed_length = column->type_length();
+
+    if (auto logical_type = logical_type_to_doris_type(column, &result); logical_type != nullptr) {
+        result.doris_type = logical_type;
+    } else if (!result.unsupported_reason.empty()) {
+        result.doris_type = nullptr;
+        result.supports_record_reader = false;
+    } else if (auto converted_type = converted_type_to_doris_type(column, &result);
+               converted_type != nullptr) {
+        result.doris_type = converted_type;
+    } else if (!result.unsupported_reason.empty()) {
+        result.doris_type = nullptr;
+        result.supports_record_reader = false;
+    } else {
+        result.doris_type = physical_type_to_doris_type(column);
+        if (result.physical_type == ::parquet::Type::INT96) {
+            result.extra_type_info = ParquetExtraTypeInfo::IMPALA_TIMESTAMP;
+        }
+    }
+
+    result.is_string_like = !result.is_decimal &&
+                            result.extra_type_info != ParquetExtraTypeInfo::FLOAT16 &&
+                            (result.physical_type == ::parquet::Type::BYTE_ARRAY ||
+                             result.physical_type == ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+
+    if (!record_reader_physical_type_supported(result.physical_type)) {
+        result.supports_record_reader = false;
+    }
+    return result;
+}
+
+bool supports_record_reader(const ParquetTypeDescriptor& type_descriptor) {
+    return type_descriptor.supports_record_reader;
+}
+
+DecodedValueKind decoded_value_kind(const ParquetTypeDescriptor& type_descriptor) {
+    switch (type_descriptor.physical_type) {
+    case ::parquet::Type::BOOLEAN:
+        return DecodedValueKind::BOOL;
+    case ::parquet::Type::INT32:
+        if (type_descriptor.is_unsigned_integer && type_descriptor.integer_bit_width == 32) {
+            return DecodedValueKind::UINT32;
+        }
+        return DecodedValueKind::INT32;
+    case ::parquet::Type::INT64:
+        if (type_descriptor.is_unsigned_integer && type_descriptor.integer_bit_width == 64) {
+            return DecodedValueKind::UINT64;
+        }
+        return DecodedValueKind::INT64;
+    case ::parquet::Type::INT96:
+        return DecodedValueKind::INT96;
+    case ::parquet::Type::FLOAT:
+        return DecodedValueKind::FLOAT;
+    case ::parquet::Type::DOUBLE:
+        return DecodedValueKind::DOUBLE;
+    case ::parquet::Type::FIXED_LEN_BYTE_ARRAY:
+        return DecodedValueKind::FIXED_BINARY;
+    case ::parquet::Type::BYTE_ARRAY:
+    default:
+        return DecodedValueKind::BINARY;
+    }
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/parquet_type.h b/be/src/format_v2/parquet/parquet_type.h
new file mode 100644
index 00000000000000..5d21aae6bae092
--- /dev/null
+++ b/be/src/format_v2/parquet/parquet_type.h
@@ -0,0 +1,82 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <parquet/types.h>
+
+#include <string>
+
+#include "core/data_type/data_type.h"
+#include "core/data_type_serde/decoded_column_view.h"
+
+namespace parquet {
+class ColumnDescriptor;
+} // namespace parquet
+
+namespace doris::format::parquet {
+
+// ============================================================================
+// ============================================================================
+
+enum class ParquetExtraTypeInfo {
+    NONE,               // no special encoding; read by physical type
+    DECIMAL_INT32,      // decimal stored as a 4-byte big-endian int
+    DECIMAL_INT64,      // decimal stored as an 8-byte big-endian int
+    DECIMAL_BYTE_ARRAY, // decimal stored as a variable/fixed-length big-endian byte array
+    UNIT_MS,            // time unit is milliseconds
+    UNIT_MICROS,        // time unit is microseconds
+    UNIT_NS,            // time unit is nanoseconds
+    IMPALA_TIMESTAMP,   // Impala-compatible timestamp encoded as INT96
+    FLOAT16,            // half-precision float (FIXED_LEN_BYTE_ARRAY(2) -> Float32)
+};
+
+enum class ParquetTimeUnit {
+    UNKNOWN,
+    MILLIS,
+    MICROS,
+    NANOS,
+};
+
+// ============================================================================
+// ============================================================================
+struct ParquetTypeDescriptor {
+    DataTypePtr doris_type;
+    ParquetExtraTypeInfo extra_type_info = ParquetExtraTypeInfo::NONE;
+    ParquetTimeUnit time_unit = ParquetTimeUnit::UNKNOWN;
+    ::parquet::Type::type physical_type = ::parquet::Type::UNDEFINED;
+    ::parquet::ConvertedType::type converted_type = ::parquet::ConvertedType::UNDEFINED;
+    int integer_bit_width = -1;                // bit width for INT_8/16/32/64
+    int decimal_precision = -1;                // precision for DECIMAL(p,s)
+    int decimal_scale = -1;                    // scale for DECIMAL(p,s)
+    int fixed_length = -1;                     // fixed length for FIXED_LEN_BYTE_ARRAY
+    bool is_unsigned_integer = false;          // whether the integer is unsigned (UINT_8/16/32/64)
+    bool is_decimal = false;                   // whether this is a decimal type
+    bool is_timestamp = false;                 // whether this is a timestamp type
+    bool timestamp_is_adjusted_to_utc = false; // whether the timestamp is UTC-normalized
+    bool is_string_like = false;               // binary type that is neither decimal nor FLOAT16
+    bool supports_record_reader = true;        // whether Arrow RecordReader can read this type
+    std::string unsupported_reason; // non-empty when this Parquet logical type is unsupported
+};
+
+std::string parquet_column_name(const ::parquet::ColumnDescriptor* column);
+
+ParquetTypeDescriptor resolve_parquet_type(const ::parquet::ColumnDescriptor* column);
+
+bool supports_record_reader(const ParquetTypeDescriptor& type_descriptor);
+
+DecodedValueKind decoded_value_kind(const ParquetTypeDescriptor& type_descriptor);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/column_reader.cpp b/be/src/format_v2/parquet/reader/column_reader.cpp
new file mode 100644
index 00000000000000..9b7577e5521ea8
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/column_reader.cpp
@@ -0,0 +1,625 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/column_reader.h"
+
+#include <arrow/memory_pool.h>
+#include <parquet/api/reader.h>
+#include <parquet/api/schema.h>
+#include <parquet/level_conversion.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <exception>
+#include <map>
+#include <memory>
+#include <ranges>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_struct.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/global_rowid_column_reader.h"
+#include "format_v2/parquet/reader/list_column_reader.h"
+#include "format_v2/parquet/reader/map_column_reader.h"
+#include "format_v2/parquet/reader/row_position_column_reader.h"
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+#include "format_v2/parquet/reader/struct_column_reader.h"
+#include "runtime/runtime_profile.h"
+
+namespace doris::format::parquet {
+namespace {
+
+class DataPageSkipFilter {
+public:
+    DataPageSkipFilter(const ParquetPageSkipPlan* page_skip_plan,
+                       ParquetPageSkipProfile page_skip_profile)
+            : _page_skip_plan(page_skip_plan), _page_skip_profile(page_skip_profile) {
+        DORIS_CHECK(_page_skip_plan != nullptr);
+    }
+
+    bool operator()(const ::parquet::DataPageStats&) {
+        // Arrow invokes this callback once for each DATA_PAGE/DATA_PAGE_V2 and never for
+        // dictionary pages, so this ordinal matches Parquet OffsetIndex page locations.
+        const size_t page_idx = _next_data_page_idx++;
+        const bool skip = _page_skip_plan->should_skip_page(page_idx);
+        if (!skip) {
+            return false;
+        }
+        update_skip_profile(page_idx);
+        return true;
+    }
+
+private:
+    void update_skip_profile(size_t page_idx) const {
+        if (_page_skip_profile.skipped_pages != nullptr) {
+            COUNTER_UPDATE(_page_skip_profile.skipped_pages, 1);
+        }
+        if (_page_skip_profile.skipped_bytes != nullptr) {
+            COUNTER_UPDATE(_page_skip_profile.skipped_bytes,
+                           _page_skip_plan->skipped_page_compressed_size(page_idx));
+        }
+    }
+
+    const ParquetPageSkipPlan* _page_skip_plan = nullptr;
+    ParquetPageSkipProfile _page_skip_profile;
+    size_t _next_data_page_idx = 0;
+};
+
+const ParquetPageSkipPlan* find_page_skip_plan(
+        const std::map<int, ParquetPageSkipPlan>* page_skip_plans, int leaf_column_id) {
+    if (page_skip_plans == nullptr) {
+        return nullptr;
+    }
+    const auto plan_it = page_skip_plans->find(leaf_column_id);
+    return plan_it == page_skip_plans->end() ? nullptr : &plan_it->second;
+}
+
+void install_data_page_filter(std::unique_ptr<::parquet::PageReader>& page_reader,
+                              const std::map<int, ParquetPageSkipPlan>* page_skip_plans,
+                              int leaf_column_id, ParquetPageSkipProfile page_skip_profile) {
+    DORIS_CHECK(page_reader != nullptr);
+    const ParquetPageSkipPlan* page_skip_plan =
+            find_page_skip_plan(page_skip_plans, leaf_column_id);
+    if (page_skip_plan == nullptr) {
+        return;
+    }
+    page_reader->set_data_page_filter(DataPageSkipFilter(page_skip_plan, page_skip_profile));
+}
+
+bool supports_nested_scalar_record_reader(const ParquetColumnSchema& column_schema) {
+    if (column_schema.type_descriptor.supports_record_reader) {
+        return true;
+    }
+    const auto& type_descriptor = column_schema.type_descriptor;
+    if ((type_descriptor.extra_type_info != ParquetExtraTypeInfo::NONE &&
+         type_descriptor.extra_type_info != ParquetExtraTypeInfo::FLOAT16) ||
+        type_descriptor.is_decimal || type_descriptor.is_timestamp ||
+        type_descriptor.is_string_like) {
+        return false;
+    }
+    if (type_descriptor.converted_type != ::parquet::ConvertedType::NONE &&
+        type_descriptor.converted_type != ::parquet::ConvertedType::UNDEFINED) {
+        return false;
+    }
+    switch (type_descriptor.physical_type) {
+    case ::parquet::Type::BOOLEAN:
+    case ::parquet::Type::INT32:
+    case ::parquet::Type::INT64:
+    case ::parquet::Type::FLOAT:
+    case ::parquet::Type::DOUBLE:
+        return true;
+    default:
+        return false;
+    }
+    return true;
+}
+
+} // namespace
+
+Status ParquetColumnReader::skip(int64_t rows) {
+    return Status::NotSupported("Parquet column skip is not implemented, rows={}", rows);
+}
+
+void ParquetColumnReader::advance_nested_build_level_cursor_past_parent(
+        int16_t parent_repetition_level) {
+    int64_t child_cursor = nested_build_level_cursor();
+    const auto& child_rep_levels = nested_repetition_levels();
+    const int64_t child_levels_written = nested_levels_written();
+    while (child_cursor < child_levels_written) {
+        const int16_t child_rep_level = child_rep_levels[child_cursor];
+        ++child_cursor;
+        if (!is_or_has_repeated_child() || child_rep_level <= parent_repetition_level) {
+            break;
+        }
+    }
+    set_nested_build_level_cursor(child_cursor);
+}
+
+void ParquetColumnReader::update_reader_read_rows(int64_t rows) const {
+    if (_profile.reader_read_rows != nullptr) {
+        COUNTER_UPDATE(_profile.reader_read_rows, rows);
+    }
+}
+
+void ParquetColumnReader::update_reader_skip_rows(int64_t rows) const {
+    if (_profile.reader_skip_rows != nullptr) {
+        COUNTER_UPDATE(_profile.reader_skip_rows, rows);
+    }
+}
+
+Status ParquetColumnReader::select(const SelectionVector& sel, uint16_t selected_rows,
+                                   int64_t batch_rows, MutableColumnPtr& column) {
+    if (column.get() == nullptr) {
+        return Status::InvalidArgument("Parquet selected read result is null for column {}",
+                                       name());
+    }
+    RETURN_IF_ERROR(sel.verify(selected_rows, batch_rows));
+
+    const auto ranges = selection_to_ranges(sel, selected_rows);
+    int64_t cursor = 0;
+    for (const auto& range : ranges) {
+        if (range.start < cursor || range.start + range.length > batch_rows) {
+            return Status::InvalidArgument("Invalid parquet selection range [{}, {}) for column {}",
+                                           range.start, range.start + range.length, name());
+        }
+        RETURN_IF_ERROR(skip(range.start - cursor));
+
+        int64_t range_rows_read = 0;
+        RETURN_IF_ERROR(read(range.length, column, &range_rows_read));
+        if (range_rows_read != range.length) {
+            return Status::Corruption(
+                    "Parquet selected read returned {} rows, expected {} rows for column {}",
+                    range_rows_read, range.length, name());
+        }
+        cursor = range.start + range.length;
+    }
+    RETURN_IF_ERROR(skip(batch_rows - cursor));
+    if (_profile.reader_select_rows != nullptr) {
+        COUNTER_UPDATE(_profile.reader_select_rows, selected_rows);
+    }
+    return Status::OK();
+}
+
+ParquetColumnReaderFactory::ParquetColumnReaderFactory(
+        std::shared_ptr<::parquet::RowGroupReader> row_group, int num_leaf_columns,
+        const std::map<int, ParquetPageSkipPlan>* page_skip_plans,
+        ParquetPageSkipProfile page_skip_profile, const cctz::time_zone* timezone,
+        bool enable_strict_mode, ParquetColumnReaderProfile column_reader_profile)
+        : _row_group(std::move(row_group)),
+          _record_readers(static_cast<size_t>(num_leaf_columns)),
+          _page_skip_plans(page_skip_plans),
+          _page_skip_profile(page_skip_profile),
+          _timezone(timezone),
+          _enable_strict_mode(enable_strict_mode),
+          _column_reader_profile(column_reader_profile) {}
+
+std::unique_ptr<ParquetColumnReader> ParquetColumnReaderFactory::create_row_position_column_reader(
+        int64_t row_group_first_row) const {
+    return std::make_unique<RowPositionColumnReader>(row_group_first_row, _column_reader_profile);
+}
+
+std::unique_ptr<ParquetColumnReader> ParquetColumnReaderFactory::create_global_rowid_column_reader(
+        const format::GlobalRowIdContext& context, int64_t row_group_first_row) const {
+    return std::make_unique<GlobalRowIdColumnReader>(context, row_group_first_row,
+                                                     _column_reader_profile);
+}
+
+Status ParquetColumnReaderFactory::make_scalar_column_reader(
+        const ParquetColumnSchema& column_schema,
+        std::shared_ptr<::parquet::internal::RecordReader> record_reader, bool use_page_skip_plan,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    const auto* page_skip_plan =
+            use_page_skip_plan ? find_page_skip_plan(_page_skip_plans, column_schema.leaf_column_id)
+                               : nullptr;
+    *reader = std::make_unique<ScalarColumnReader>(column_schema, std::move(record_reader),
+                                                   page_skip_plan, _timezone, _enable_strict_mode,
+                                                   _column_reader_profile);
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create_scalar_column_reader(
+        const ParquetColumnSchema& column_schema, bool is_nested,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    if (!column_schema.type_descriptor.unsupported_reason.empty()) {
+        return Status::NotSupported("Unsupported parquet column '{}': {}", column_schema.name,
+                                    column_schema.type_descriptor.unsupported_reason);
+    }
+    if (is_nested && column_schema.kind != ParquetColumnSchemaKind::PRIMITIVE) {
+        return Status::InvalidArgument("Parquet nested scalar reader requires primitive column {}",
+                                       column_schema.name);
+    }
+    if (column_schema.leaf_column_id < 0 ||
+        column_schema.leaf_column_id >= static_cast<int>(_record_readers.size())) {
+        return Status::InvalidArgument("Invalid parquet leaf column id {} for column {}",
+                                       column_schema.leaf_column_id, column_schema.name);
+    }
+    if (column_schema.descriptor == nullptr) {
+        return Status::InvalidArgument("Parquet column descriptor is null for column {}",
+                                       column_schema.name);
+    }
+    if (!is_nested && (column_schema.descriptor->max_repetition_level() != 0 ||
+                       column_schema.descriptor->max_definition_level() > 1)) {
+        return Status::NotSupported(
+                "Current parquet scalar reader only supports flat primitive columns; column {} is "
+                "not supported",
+                column_schema.name);
+    }
+    if (is_nested && !supports_nested_scalar_record_reader(column_schema)) {
+        return Status::NotSupported(
+                "Current parquet nested scalar reader does not support column {}",
+                column_schema.name);
+    }
+    if (!is_nested && !column_schema.type_descriptor.supports_record_reader) {
+        return Status::NotSupported("Current parquet scalar reader does not support column {}",
+                                    column_schema.name);
+    }
+    std::shared_ptr<::parquet::internal::RecordReader> record_reader;
+    // Nested readers implement skip() by materializing rows into a scratch column. If Arrow
+    // page filtering is also installed, those scratch reads can consume the next selected row
+    // after a page-index range gap. Keep page filtering on flat scalar readers only.
+    RETURN_IF_ERROR(get_record_reader(column_schema.leaf_column_id, column_schema.descriptor,
+                                      column_schema.name, !is_nested, &record_reader));
+    return make_scalar_column_reader(column_schema, std::move(record_reader), !is_nested, reader);
+}
+
+//   1. RowGroupReader::GetColumnPageReader(leaf_column_id) -> Arrow PageReader
+Status ParquetColumnReaderFactory::get_record_reader(
+        int leaf_column_id, const ::parquet::ColumnDescriptor* descriptor, const std::string& name,
+        bool install_page_filter,
+        std::shared_ptr<::parquet::internal::RecordReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    if (_row_group == nullptr) {
+        return Status::InternalError("Parquet row group reader is not initialized for column {}",
+                                     name);
+    }
+    if (leaf_column_id < 0 || leaf_column_id >= static_cast<int>(_record_readers.size())) {
+        return Status::InvalidArgument("Invalid parquet leaf column id {} for column {}",
+                                       leaf_column_id, name);
+    }
+    if (descriptor == nullptr) {
+        return Status::InvalidArgument("Parquet column descriptor is null for column {}", name);
+    }
+    if (_record_readers[leaf_column_id] == nullptr) {
+        try {
+            auto page_reader = _row_group->GetColumnPageReader(leaf_column_id);
+            if (install_page_filter) {
+                install_data_page_filter(page_reader, _page_skip_plans, leaf_column_id,
+                                         _page_skip_profile);
+            }
+            const auto level_info = ::parquet::internal::LevelInfo::ComputeLevelInfo(descriptor);
+            _record_readers[leaf_column_id] = ::parquet::internal::RecordReader::Make(
+                    descriptor, level_info, ::arrow::default_memory_pool(),
+                    /*read_dictionary=*/false,
+                    /*read_dense_for_nullable=*/false);
+            _record_readers[leaf_column_id]->SetPageReader(std::move(page_reader));
+        } catch (const ::parquet::ParquetException& e) {
+            return Status::Corruption("Failed to create parquet record reader for column {}: {}",
+                                      name, e.what());
+        } catch (const std::exception& e) {
+            return Status::InternalError("Failed to create parquet record reader for column {}: {}",
+                                         name, e.what());
+        }
+    }
+    if (_record_readers[leaf_column_id] == nullptr) {
+        return Status::Corruption("Failed to create parquet record reader for column {}", name);
+    }
+    *reader = _record_readers[leaf_column_id];
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create_struct_column_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    std::vector<std::unique_ptr<ParquetColumnReader>> child_readers;
+    child_readers.reserve(column_schema.children.size());
+    std::vector<int> child_output_indices;
+    child_output_indices.reserve(column_schema.children.size());
+    DataTypes projected_child_types;
+    Strings projected_child_names;
+    for (size_t child_idx = 0; child_idx < column_schema.children.size(); ++child_idx) {
+        const auto& child_schema = column_schema.children[child_idx];
+        const auto* child_projection =
+                format::find_child_projection(projection, child_schema->local_id);
+        if (!format::is_child_projected(projection, child_schema->local_id)) {
+            continue;
+        }
+        std::unique_ptr<ParquetColumnReader> child_reader;
+        RETURN_IF_ERROR(create_column_reader(*child_schema, child_projection, true, &child_reader));
+        child_output_indices.push_back(static_cast<int>(projected_child_types.size()));
+        projected_child_types.push_back(make_nullable(child_reader->type()));
+        projected_child_names.push_back(child_reader->name());
+        child_readers.push_back(std::move(child_reader));
+    }
+    if (format::is_partial_projection(projection) &&
+        projected_child_types.size() != projection->children.size()) {
+        return Status::InvalidArgument(
+                "Parquet STRUCT projection for column {} contains invalid child",
+                column_schema.name);
+    }
+    if (projected_child_types.empty() && !column_schema.children.empty()) {
+        return Status::NotSupported("Parquet STRUCT projection for column {} contains no children",
+                                    column_schema.name);
+    }
+    DataTypePtr type = column_schema.type;
+    if (format::is_partial_projection(projection)) {
+        type = std::make_shared<DataTypeStruct>(projected_child_types, projected_child_names);
+        if (column_schema.type != nullptr && column_schema.type->is_nullable()) {
+            type = make_nullable(type);
+        }
+    }
+    *reader = std::make_unique<StructColumnReader>(
+            column_schema, std::move(type), std::move(child_readers),
+            std::move(child_output_indices), _column_reader_profile);
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create_list_column_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    if (column_schema.children.size() != 1) {
+        return Status::NotSupported("Unsupported parquet LIST layout for column {}",
+                                    column_schema.name);
+    }
+    std::unique_ptr<ParquetColumnReader> element_reader;
+    const auto& element_schema = *column_schema.children[0];
+    const auto* element_projection =
+            format::find_child_projection(projection, element_schema.local_id);
+    if (format::is_partial_projection(projection) && element_projection == nullptr) {
+        return Status::NotSupported("Parquet LIST projection for column {} contains no element",
+                                    column_schema.name);
+    }
+    RETURN_IF_ERROR(
+            create_column_reader(element_schema, element_projection, true, &element_reader));
+    DataTypePtr type = column_schema.type;
+    if (format::is_partial_projection(element_projection)) {
+        type = std::make_shared<DataTypeArray>(element_reader->type());
+        if (column_schema.type != nullptr && column_schema.type->is_nullable()) {
+            type = make_nullable(type);
+        }
+    }
+    *reader = std::make_unique<ListColumnReader>(column_schema, std::move(type),
+                                                 std::move(element_reader), _column_reader_profile);
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create_map_column_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    if (column_schema.children.size() != 2) {
+        return Status::NotSupported("Unsupported parquet MAP layout for column {}",
+                                    column_schema.name);
+    }
+    const auto& key_schema = *column_schema.children[0];
+    const auto& value_schema = *column_schema.children[1];
+    const auto* value_projection = format::find_child_projection(projection, value_schema.local_id);
+    if (format::is_partial_projection(projection)) {
+        if (value_projection == nullptr) {
+            return Status::NotSupported("Parquet MAP projection for column {} contains no value",
+                                        column_schema.name);
+        }
+        for (const auto& child_projection : projection->children) {
+            if (child_projection.local_id() == key_schema.local_id) {
+                continue;
+            }
+            if (child_projection.local_id() != value_schema.local_id) {
+                return Status::InvalidArgument(
+                        "Parquet MAP projection for column {} contains invalid child",
+                        column_schema.name);
+            }
+        }
+    }
+    std::unique_ptr<ParquetColumnReader> key_reader;
+    // MAP materialization always needs the full key stream. It owns entry existence, offsets and
+    // key equality semantics, so MAP projection is defined only as value-subtree pruning.
+    RETURN_IF_ERROR(create_column_reader(key_schema, nullptr, true, &key_reader));
+    std::unique_ptr<ParquetColumnReader> value_reader;
+    RETURN_IF_ERROR(create_column_reader(value_schema, value_projection, true, &value_reader));
+    DataTypePtr type = column_schema.type;
+    if (format::is_partial_projection(value_projection)) {
+        type = std::make_shared<DataTypeMap>(make_nullable(key_reader->type()),
+                                             make_nullable(value_reader->type()));
+        if (column_schema.type != nullptr && column_schema.type->is_nullable()) {
+            type = make_nullable(type);
+        }
+    }
+    *reader =
+            std::make_unique<MapColumnReader>(column_schema, std::move(type), std::move(key_reader),
+                                              std::move(value_reader), _column_reader_profile);
+    return Status::OK();
+}
+
+Status ParquetColumnReaderFactory::create(const ParquetColumnSchema& column_schema,
+                                          const format::LocalColumnIndex* projection,
+                                          std::unique_ptr<ParquetColumnReader>* reader) const {
+    return create_column_reader(column_schema, projection, false, reader);
+}
+
+Status ParquetColumnReaderFactory::create_count_shape_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        std::unique_ptr<ParquetColumnReader>* reader) const {
+    return create_count_shape_reader_impl(column_schema, projection, false, reader);
+}
+
+Status ParquetColumnReaderFactory::create_count_shape_reader_impl(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        bool is_nested, std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    switch (column_schema.kind) {
+    case ParquetColumnSchemaKind::PRIMITIVE:
+        if (format::is_partial_projection(projection)) {
+            return Status::InvalidArgument("Parquet COUNT projection is invalid for column {}",
+                                           column_schema.name);
+        }
+        return create_scalar_column_reader(column_schema, is_nested, reader);
+    case ParquetColumnSchemaKind::STRUCT: {
+        if (column_schema.children.empty()) {
+            return Status::NotSupported("Parquet COUNT shape reader found empty STRUCT column {}",
+                                        column_schema.name);
+        }
+        const ParquetColumnSchema* child_schema = nullptr;
+        const format::LocalColumnIndex* child_projection = nullptr;
+        if (format::is_partial_projection(projection)) {
+            const auto child_id = projection->children[0].local_id();
+            const auto child_it = std::ranges::find_if(
+                    column_schema.children,
+                    [&](const auto& child) { return child->local_id == child_id; });
+            if (child_it == column_schema.children.end()) {
+                return Status::InvalidArgument(
+                        "Parquet COUNT projection for column {} contains invalid child",
+                        column_schema.name);
+            }
+            child_schema = child_it->get();
+            child_projection = &projection->children[0];
+        } else {
+            child_schema = column_schema.children[0].get();
+        }
+        DORIS_CHECK(child_schema != nullptr);
+        return create_count_shape_reader_impl(*child_schema, child_projection, true, reader);
+    }
+    case ParquetColumnSchemaKind::LIST: {
+        if (column_schema.children.size() != 1) {
+            return Status::NotSupported("Unsupported parquet LIST layout for COUNT column {}",
+                                        column_schema.name);
+        }
+        const auto& element_schema = *column_schema.children[0];
+        const auto* element_projection =
+                format::find_child_projection(projection, element_schema.local_id);
+        return create_count_shape_reader_impl(element_schema, element_projection, true, reader);
+    }
+    case ParquetColumnSchemaKind::MAP: {
+        if (column_schema.children.empty()) {
+            return Status::NotSupported("Unsupported parquet MAP layout for COUNT column {}",
+                                        column_schema.name);
+        }
+        // The key stream defines MAP entry existence and offsets. Counting top-level MAP NULL-ness
+        // from it avoids creating a value reader, which is the expensive path for files with huge
+        // MAP value strings.
+        return create_count_shape_reader_impl(*column_schema.children[0], nullptr, true, reader);
+    }
+    }
+    return Status::NotSupported("Unsupported parquet column schema kind for COUNT column {}",
+                                column_schema.name);
+}
+
+Status ParquetColumnReaderFactory::create_column_reader(
+        const ParquetColumnSchema& column_schema, const format::LocalColumnIndex* projection,
+        bool is_nested, std::unique_ptr<ParquetColumnReader>* reader) const {
+    if (reader == nullptr) {
+        return Status::InvalidArgument("reader is null");
+    }
+    switch (column_schema.kind) {
+    case ParquetColumnSchemaKind::PRIMITIVE:
+        if (is_nested) {
+            if (format::is_partial_projection(projection)) {
+                return Status::InvalidArgument("Parquet scalar projection is invalid for column {}",
+                                               column_schema.name);
+            }
+            return create_scalar_column_reader(column_schema, true, reader);
+        }
+        return create_scalar_column_reader(column_schema, false, reader);
+    case ParquetColumnSchemaKind::STRUCT:
+        return create_struct_column_reader(column_schema, projection, reader);
+    case ParquetColumnSchemaKind::LIST:
+        return create_list_column_reader(column_schema, projection, reader);
+    case ParquetColumnSchemaKind::MAP:
+        return create_map_column_reader(column_schema, projection, reader);
+    }
+    return Status::NotSupported("Unsupported parquet column schema kind for column {}",
+                                column_schema.name);
+}
+
+ParquetColumnReader::ParquetColumnReader(const ParquetColumnSchema& schema, const DataTypePtr type,
+                                         ParquetColumnReaderProfile profile)
+        : _profile(profile),
+          _field_id(schema.local_id),
+          _leaf_column_id(schema.leaf_column_id),
+          _nullable_definition_level(schema.nullable_definition_level),
+          _repeated_repetition_level(schema.repeated_repetition_level),
+          _definition_level(schema.definition_level),
+          _repetition_level(schema.repetition_level),
+          _repeated_ancestor_definition_level(schema.repeated_ancestor_definition_level),
+          _type(std::move(type)),
+          _name(schema.name) {}
+
+Status ParquetColumnReader::load_nested_batch(int64_t) {
+    return Status::NotSupported("Parquet nested batch load is not supported for column {}", _name);
+}
+
+Status ParquetColumnReader::load_nested_levels_batch(int64_t) {
+    return Status::NotSupported("Parquet nested levels batch load is not supported for column {}",
+                                _name);
+}
+
+Status ParquetColumnReader::build_nested_column(int64_t, MutableColumnPtr&, int64_t*) {
+    return Status::NotSupported("Parquet nested column build is not supported for column {}",
+                                _name);
+}
+
+Status ParquetColumnReader::skip_nested_column(int64_t rows) {
+    auto scratch_column = _type->create_column();
+    int64_t values_read = 0;
+    RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &values_read));
+    if (values_read != rows) {
+        return Status::Corruption("Failed to skip nested parquet column {}: skipped {} of {} rows",
+                                  _name, values_read, rows);
+    }
+    return Status::OK();
+}
+
+const std::vector<int16_t>& ParquetColumnReader::nested_definition_levels() const {
+    static const std::vector<int16_t> empty;
+    return empty;
+}
+
+const std::vector<int16_t>& ParquetColumnReader::nested_repetition_levels() const {
+    static const std::vector<int16_t> empty;
+    return empty;
+}
+
+int64_t ParquetColumnReader::nested_levels_written() const {
+    return 0;
+}
+
+bool ParquetColumnReader::is_or_has_repeated_child() const {
+    return _repetition_level > 0;
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/column_reader.h b/be/src/format_v2/parquet/reader/column_reader.h
new file mode 100644
index 00000000000000..f439010e8830d7
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/column_reader.h
@@ -0,0 +1,200 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type.h"
+#include "format_v2/column_data.h"
+#include "format_v2/parquet/parquet_profile.h"
+#include "format_v2/parquet/parquet_type.h"
+#include "format_v2/parquet/selection_vector.h"
+#include "runtime/runtime_profile.h"
+
+namespace parquet {
+class ColumnDescriptor;
+class RowGroupReader;
+
+namespace internal {
+class RecordReader;
+} // namespace internal
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris {
+class IColumn;
+} // namespace doris
+
+namespace doris::format::parquet {
+struct ParquetColumnSchema;
+
+class ParquetColumnReader {
+public:
+    virtual ~ParquetColumnReader() = default;
+
+    virtual int file_column_id() const { return _field_id; }
+
+    virtual int parquet_leaf_column_id() const { return _leaf_column_id; }
+
+    int16_t nullable_definition_level() const { return _nullable_definition_level; }
+    int16_t repeated_repetition_level() const { return _repeated_repetition_level; }
+
+    virtual const DataTypePtr& type() const { return _type; }
+    virtual const std::string& name() const { return _name; }
+    const ParquetColumnReaderProfile& profile() const { return _profile; }
+
+    virtual Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) = 0;
+
+    virtual Status skip(int64_t rows);
+
+    virtual Status select(const SelectionVector& sel, uint16_t selected_rows, int64_t batch_rows,
+                          MutableColumnPtr& column);
+
+    virtual Status load_nested_batch(int64_t rows);
+
+    // Shape-only load interface for COUNT(col). Implementations only guarantee that
+    // nested_definition_levels(), nested_repetition_levels(), and nested_levels_written() are available;
+    // value_indices and values_column are not guaranteed, so callers must not call build_nested_column() afterwards.
+    // This protocol lets the V2 aggregation path avoid Doris-side value materialization even when
+    // the representative ARRAY/STRUCT leaf is STRING/BINARY; normal scans still use load_nested_batch().
+    virtual Status load_nested_levels_batch(int64_t rows);
+
+    virtual Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                       int64_t* values_read);
+
+    virtual Status skip_nested_column(int64_t rows);
+
+    virtual const std::vector<int16_t>& nested_definition_levels() const;
+    virtual const std::vector<int16_t>& nested_repetition_levels() const;
+    virtual int64_t nested_levels_written() const;
+    virtual bool is_or_has_repeated_child() const;
+    virtual void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level);
+
+    int64_t nested_build_level_cursor() const { return _nested_build_level_cursor; }
+    void set_nested_build_level_cursor(int64_t cursor) {
+        DORIS_CHECK(cursor >= 0);
+        _nested_build_level_cursor = cursor;
+    }
+    void reset_nested_build_level_cursor() { _nested_build_level_cursor = 0; }
+
+protected:
+    ParquetColumnReader(const ParquetColumnSchema& schema, const DataTypePtr type,
+                        ParquetColumnReaderProfile profile = {});
+    ParquetColumnReader() = default;
+    void update_reader_read_rows(int64_t rows) const;
+    void update_reader_skip_rows(int64_t rows) const;
+
+    ParquetColumnReaderProfile _profile;
+    const int _field_id = -1;       // child ordinal in the parent node
+    const int _leaf_column_id = -1; // Parquet physical leaf column id (-1 = non-leaf)
+    const int16_t _nullable_definition_level =
+            0; // definition-level threshold where this node becomes nullable
+    const int16_t _repeated_repetition_level =
+            0;                           // repetition level of the nearest repeated ancestor
+    const int16_t _definition_level = 0; // definition level accumulated to this node
+    const int16_t _repetition_level = 0; // repetition level accumulated to this node
+    const int16_t _repeated_ancestor_definition_level =
+            0;                              // definition level of the nearest repeated ancestor
+    const DataTypePtr _type;                // Doris target type
+    const std::string _name;                // column name for error messages
+    int64_t _nested_build_level_cursor = 0; // nested build cursor (current level position)
+};
+
+class ParquetColumnReaderFactory {
+public:
+    ParquetColumnReaderFactory(std::shared_ptr<::parquet::RowGroupReader> row_group,
+                               int num_leaf_columns,
+                               const std::map<int, ParquetPageSkipPlan>* page_skip_plans = nullptr,
+                               ParquetPageSkipProfile page_skip_profile = {},
+                               const cctz::time_zone* timezone = nullptr,
+                               bool enable_strict_mode = false,
+                               ParquetColumnReaderProfile column_reader_profile = {});
+
+    Status create(const ParquetColumnSchema& column_schema,
+                  const format::LocalColumnIndex* projection,
+                  std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    // Create a scalar reader for one representative leaf that carries the top-level column shape.
+    // This is used by COUNT(col): the caller needs definition/repetition levels to decide whether
+    // the top-level value is NULL, but must not materialize heavy payload leaves. MAP deliberately
+    // uses the key leaf because the key stream owns entry existence and avoids reading value pages.
+    Status create_count_shape_reader(const ParquetColumnSchema& column_schema,
+                                     const format::LocalColumnIndex* projection,
+                                     std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    Status create(const ParquetColumnSchema& column_schema,
+                  std::unique_ptr<ParquetColumnReader>* reader) const {
+        return create(column_schema, nullptr, reader);
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_row_position_column_reader(
+            int64_t row_group_first_row) const;
+    std::unique_ptr<ParquetColumnReader> create_global_rowid_column_reader(
+            const format::GlobalRowIdContext& context, int64_t row_group_first_row) const;
+
+private:
+    Status create_scalar_column_reader(const ParquetColumnSchema& column_schema, bool is_nested,
+                                       std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    Status create_struct_column_reader(const ParquetColumnSchema& column_schema,
+                                       const format::LocalColumnIndex* projection,
+                                       std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    Status create_list_column_reader(const ParquetColumnSchema& column_schema,
+                                     const format::LocalColumnIndex* projection,
+                                     std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    Status create_map_column_reader(const ParquetColumnSchema& column_schema,
+                                    const format::LocalColumnIndex* projection,
+                                    std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    Status create_column_reader(const ParquetColumnSchema& column_schema,
+                                const format::LocalColumnIndex* projection, bool is_nested,
+                                std::unique_ptr<ParquetColumnReader>* reader) const;
+    Status create_count_shape_reader_impl(const ParquetColumnSchema& column_schema,
+                                          const format::LocalColumnIndex* projection,
+                                          bool is_nested,
+                                          std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    Status get_record_reader(int leaf_column_id, const ::parquet::ColumnDescriptor* descriptor,
+                             const std::string& name, bool install_page_filter,
+                             std::shared_ptr<::parquet::internal::RecordReader>* reader) const;
+
+    Status make_scalar_column_reader(
+            const ParquetColumnSchema& column_schema,
+            std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+            bool use_page_skip_plan, std::unique_ptr<ParquetColumnReader>* reader) const;
+
+    std::shared_ptr<::parquet::RowGroupReader> _row_group; // Arrow RowGroup reader
+    mutable std::vector<std::shared_ptr<::parquet::internal::RecordReader>>
+            _record_readers; // RecordReader cache by leaf_column_id
+    const std::map<int, ParquetPageSkipPlan>* _page_skip_plans =
+            nullptr;                                   // page-index pruning result
+    ParquetPageSkipProfile _page_skip_profile;         // page skip profile
+    const cctz::time_zone* _timezone = nullptr;        // timezone
+    bool _enable_strict_mode = false;                  // strict mode
+    ParquetColumnReaderProfile _column_reader_profile; // column reader profile
+};
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp b/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp
new file mode 100644
index 00000000000000..82b2838ba2cbfe
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/global_rowid_column_reader.cpp
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/global_rowid_column_reader.h"
+
+#include <memory>
+
+#include "common/cast_set.h"
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/column/column_string.h"
+#include "core/data_type/data_type_string.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "storage/utils.h"
+
+namespace doris::format::parquet {
+
+GlobalRowIdColumnReader::GlobalRowIdColumnReader(format::GlobalRowIdContext context,
+                                                 int64_t row_group_first_row,
+                                                 ParquetColumnReaderProfile profile)
+        : ParquetColumnReader(ParquetColumnSchema {.name = BeConsts::GLOBAL_ROWID_COL},
+                              std::make_shared<DataTypeString>(), profile),
+          _context(context),
+          _row_group_first_row(row_group_first_row) {}
+
+int GlobalRowIdColumnReader::file_column_id() const {
+    return format::GLOBAL_ROWID_COLUMN_ID;
+}
+
+int GlobalRowIdColumnReader::parquet_leaf_column_id() const {
+    return -1;
+}
+
+const DataTypePtr& GlobalRowIdColumnReader::type() const {
+    return _type;
+}
+
+const std::string& GlobalRowIdColumnReader::name() const {
+    return _name;
+}
+
+Status GlobalRowIdColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    if (column.get() == nullptr || rows_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet global rowid read result pointer");
+    }
+    if (rows < 0) {
+        return Status::InvalidArgument("Invalid parquet global rowid read rows {}", rows);
+    }
+    for (int64_t row = 0; row < rows; ++row) {
+        append_row_id(cast_set<uint32_t>(_row_group_first_row + _next_row_position + row), column);
+    }
+    _next_row_position += rows;
+    *rows_read = rows;
+    return Status::OK();
+}
+
+Status GlobalRowIdColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    _next_row_position += rows;
+    return Status::OK();
+}
+
+void GlobalRowIdColumnReader::append_row_id(uint32_t row_id, MutableColumnPtr& column) const {
+    auto* string_column = assert_cast<ColumnString*>(column.get());
+    GlobalRowLoacationV2 location(_context.version, _context.backend_id, _context.file_id, row_id);
+    string_column->insert_data(reinterpret_cast<const char*>(&location),
+                               sizeof(GlobalRowLoacationV2));
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/global_rowid_column_reader.h b/be/src/format_v2/parquet/reader/global_rowid_column_reader.h
new file mode 100644
index 00000000000000..b3f71645923010
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/global_rowid_column_reader.h
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "format_v2/column_data.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+class GlobalRowIdColumnReader final : public ParquetColumnReader {
+public:
+    GlobalRowIdColumnReader(format::GlobalRowIdContext context, int64_t row_group_first_row,
+                            ParquetColumnReaderProfile profile = {});
+
+    int file_column_id() const override;
+    int parquet_leaf_column_id() const override;
+    const DataTypePtr& type() const override;
+    const std::string& name() const override;
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+
+private:
+    void append_row_id(uint32_t row_id, MutableColumnPtr& column) const;
+
+    format::GlobalRowIdContext _context; // RowId prefix (version + backend_id + file_id)
+    int64_t _row_group_first_row = 0;    // first file row of the current row group
+    int64_t _next_row_position = 0;      // next row position to emit
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/list_column_reader.cpp b/be/src/format_v2/parquet/reader/list_column_reader.cpp
new file mode 100644
index 00000000000000..aaf8f6635f1af0
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/list_column_reader.cpp
@@ -0,0 +1,203 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/list_column_reader.h"
+
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_nullable.h"
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+
+namespace doris::format::parquet {
+namespace {
+
+void remove_nullable_wrapper_if_not_expected(const DataTypePtr& output_type,
+                                             MutableColumnPtr* column) {
+    DORIS_CHECK(column != nullptr);
+    if (output_type->is_nullable()) {
+        return;
+    }
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(**column)) {
+        *column = nullable_column->get_nested_column_ptr();
+    }
+}
+
+} // namespace
+
+Status ListColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    return build_nested_column(rows, column, rows_read);
+}
+
+Status ListColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    auto scratch_column = _type->create_column();
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    int64_t rows_read = 0;
+    RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read));
+    if (rows_read != rows) {
+        return Status::Corruption("Failed to skip parquet LIST column {}: skipped {} of {} rows",
+                                  _name, rows_read, rows);
+    }
+    update_reader_skip_rows(rows);
+    return Status::OK();
+}
+
+Status ListColumnReader::load_nested_batch(int64_t rows) {
+    DORIS_CHECK(_element_reader != nullptr);
+    reset_nested_build_level_cursor();
+    return _element_reader->load_nested_batch(rows);
+}
+
+Status ListColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                             int64_t* values_read) {
+    if (column.get() == nullptr || values_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet list build result pointer for column {}",
+                                       _name);
+    }
+    DORIS_CHECK(_element_reader != nullptr);
+    auto* array_column = array_column_from_output(column);
+    DORIS_CHECK(array_column != nullptr);
+    auto* parent_null_map = null_map_from_nullable_output(column);
+    auto nested_column = array_column->get_data_ptr()->assert_mutable();
+    const auto& element_output_type =
+            assert_cast<const DataTypeArray&>(*remove_nullable(_type)).get_nested_type();
+    remove_nullable_wrapper_if_not_expected(element_output_type, &nested_column);
+
+    const auto& def_levels = _element_reader->nested_definition_levels();
+    const auto& rep_levels = _element_reader->nested_repetition_levels();
+    const int64_t levels_written = _element_reader->nested_levels_written();
+    std::vector<uint64_t> entry_counts;
+    NullMap parent_nulls;
+    *values_read = 0;
+    int64_t level_idx = nested_build_level_cursor();
+    const int16_t min_parent_definition_level =
+            static_cast<int16_t>(_definition_level - 1 - (_type->is_nullable() ? 1 : 0));
+    while (level_idx < levels_written) {
+        const int16_t def_level = def_levels[level_idx];
+        const int16_t rep_level = rep_levels[level_idx];
+        const bool starts_parent = rep_level < _repetition_level;
+        if (starts_parent && *values_read >= length_upper_bound) {
+            break;
+        }
+        ++level_idx;
+        if (rep_level > _repetition_level || def_level < min_parent_definition_level ||
+            (!starts_parent && def_level < _repeated_ancestor_definition_level)) {
+            continue;
+        }
+        if (rep_level == _repetition_level) {
+            if (entry_counts.empty()) {
+                return Status::Corruption("Invalid repeated level for parquet LIST column {}",
+                                          _name);
+            }
+            if (def_level >= _definition_level) {
+                ++entry_counts.back();
+            }
+            continue;
+        }
+
+        const bool parent_is_null = def_level < _definition_level - 1;
+        if (parent_is_null && parent_null_map == nullptr) {
+            return Status::Corruption("Parquet LIST column {} contains null for non-nullable LIST",
+                                      _name);
+        }
+        parent_nulls.push_back(parent_is_null);
+        entry_counts.push_back(def_level >= _definition_level ? 1 : 0);
+        ++*values_read;
+    }
+    set_nested_build_level_cursor(level_idx);
+
+    uint64_t total_entries = 0;
+    int64_t child_value_count = 0;
+    if (!_element_reader->is_or_has_repeated_child()) {
+        for (const auto entry_count : entry_counts) {
+            total_entries += entry_count;
+        }
+        RETURN_IF_ERROR(_element_reader->build_nested_column(static_cast<int64_t>(total_entries),
+                                                             nested_column, &child_value_count));
+    } else {
+        uint64_t pending_entries = 0;
+        auto flush_pending_entries = [&]() -> Status {
+            if (pending_entries == 0) {
+                return Status::OK();
+            }
+            int64_t span_child_value_count = 0;
+            RETURN_IF_ERROR(_element_reader->build_nested_column(
+                    static_cast<int64_t>(pending_entries), nested_column, &span_child_value_count));
+            if (span_child_value_count != static_cast<int64_t>(pending_entries)) {
+                return Status::Corruption(
+                        "Parquet LIST column {} built {} child values, expected {}", _name,
+                        span_child_value_count, pending_entries);
+            }
+            child_value_count += span_child_value_count;
+            pending_entries = 0;
+            return Status::OK();
+        };
+
+        for (const auto entry_count : entry_counts) {
+            total_entries += entry_count;
+            if (entry_count > 0) {
+                pending_entries += entry_count;
+                continue;
+            }
+            RETURN_IF_ERROR(flush_pending_entries());
+            _element_reader->advance_nested_build_level_cursor_past_parent(_repetition_level);
+        }
+        RETURN_IF_ERROR(flush_pending_entries());
+    }
+    if (child_value_count != static_cast<int64_t>(total_entries)) {
+        return Status::Corruption("Parquet LIST column {} built {} child values, expected {}",
+                                  _name, child_value_count, total_entries);
+    }
+    array_column->get_data_ptr() = std::move(nested_column);
+    append_offsets(array_column->get_offsets(), entry_counts);
+    append_parent_nulls(parent_null_map, parent_nulls);
+    return Status::OK();
+}
+
+const std::vector<int16_t>& ListColumnReader::nested_definition_levels() const {
+    DORIS_CHECK(_element_reader != nullptr);
+    return _element_reader->nested_definition_levels();
+}
+
+const std::vector<int16_t>& ListColumnReader::nested_repetition_levels() const {
+    DORIS_CHECK(_element_reader != nullptr);
+    return _element_reader->nested_repetition_levels();
+}
+
+int64_t ListColumnReader::nested_levels_written() const {
+    DORIS_CHECK(_element_reader != nullptr);
+    return _element_reader->nested_levels_written();
+}
+
+bool ListColumnReader::is_or_has_repeated_child() const {
+    return true;
+}
+
+void ListColumnReader::advance_nested_build_level_cursor_past_parent(
+        int16_t parent_repetition_level) {
+    DORIS_CHECK(_element_reader != nullptr);
+    ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    _element_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/list_column_reader.h b/be/src/format_v2/parquet/reader/list_column_reader.h
new file mode 100644
index 00000000000000..5a60eecacb0e3e
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/list_column_reader.h
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+class ListColumnReader final : public ParquetColumnReader {
+public:
+    ListColumnReader(const ParquetColumnSchema& schema, DataTypePtr type,
+                     std::unique_ptr<ParquetColumnReader> element_reader,
+                     ParquetColumnReaderProfile profile = {})
+            : ParquetColumnReader(schema, type, profile),
+              _element_reader(std::move(element_reader)) {}
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+    bool is_or_has_repeated_child() const override;
+    void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override;
+
+private:
+    std::unique_ptr<ParquetColumnReader>
+            _element_reader; // element reader (recursive; may be Scalar/Struct/List/Map)
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/map_column_reader.cpp b/be/src/format_v2/parquet/reader/map_column_reader.cpp
new file mode 100644
index 00000000000000..90d4a867331190
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/map_column_reader.cpp
@@ -0,0 +1,238 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/map_column_reader.h"
+
+#include <cstdint>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+
+namespace doris::format::parquet {
+namespace {
+
+void remove_nullable_wrapper_if_not_expected(const DataTypePtr& output_type,
+                                             MutableColumnPtr* column) {
+    DORIS_CHECK(column != nullptr);
+    if (output_type->is_nullable()) {
+        return;
+    }
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(**column)) {
+        *column = nullable_column->get_nested_column_ptr();
+    }
+}
+
+} // namespace
+
+Status MapColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    return build_nested_column(rows, column, rows_read);
+}
+
+Status MapColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    auto scratch_column = _type->create_column();
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    int64_t rows_read = 0;
+    RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read));
+    if (rows_read != rows) {
+        return Status::Corruption("Failed to skip parquet MAP column {}: skipped {} of {} rows",
+                                  _name, rows_read, rows);
+    }
+    update_reader_skip_rows(rows);
+    return Status::OK();
+}
+
+Status MapColumnReader::load_nested_batch(int64_t rows) {
+    DORIS_CHECK(_key_reader != nullptr);
+    DORIS_CHECK(_value_reader != nullptr);
+    reset_nested_build_level_cursor();
+    RETURN_IF_ERROR(_key_reader->load_nested_batch(rows));
+    return _value_reader->load_nested_batch(rows);
+}
+
+Status MapColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                            int64_t* values_read) {
+    if (column.get() == nullptr || values_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet map build result pointer for column {}",
+                                       _name);
+    }
+    DORIS_CHECK(_key_reader != nullptr);
+    DORIS_CHECK(_value_reader != nullptr);
+    auto* map_column = map_column_from_output(column);
+    DORIS_CHECK(map_column != nullptr);
+    auto* parent_null_map = null_map_from_nullable_output(column);
+    auto key_column = map_column->get_keys_ptr()->assert_mutable();
+    auto value_column = map_column->get_values_ptr()->assert_mutable();
+    const auto& map_output_type = assert_cast<const DataTypeMap&>(*remove_nullable(_type));
+    remove_nullable_wrapper_if_not_expected(map_output_type.get_key_type(), &key_column);
+    remove_nullable_wrapper_if_not_expected(map_output_type.get_value_type(), &value_column);
+
+    const auto& def_levels = _key_reader->nested_definition_levels();
+    const auto& rep_levels = _key_reader->nested_repetition_levels();
+    const int64_t levels_written = _key_reader->nested_levels_written();
+
+    std::vector<uint64_t> entry_counts;
+    std::vector<int64_t> map_level_indices;
+    NullMap parent_nulls;
+    *values_read = 0;
+    int64_t level_idx = nested_build_level_cursor();
+    const int16_t min_parent_definition_level =
+            static_cast<int16_t>(_definition_level - 1 - (_type->is_nullable() ? 1 : 0));
+    while (level_idx < levels_written) {
+        const int16_t def_level = def_levels[level_idx];
+        const int16_t rep_level = rep_levels[level_idx];
+        const bool starts_parent = rep_level < _repetition_level;
+        if (starts_parent && *values_read >= length_upper_bound) {
+            break;
+        }
+        const int64_t current_level_idx = level_idx;
+        ++level_idx;
+        if (rep_level > _repetition_level || def_level < min_parent_definition_level ||
+            (!starts_parent && def_level < _repeated_ancestor_definition_level)) {
+            continue;
+        }
+        map_level_indices.push_back(current_level_idx);
+        if (rep_level == _repetition_level) {
+            if (entry_counts.empty()) {
+                return Status::Corruption("Invalid repeated level for parquet MAP column {}",
+                                          _name);
+            }
+            if (def_level >= _definition_level) {
+                ++entry_counts.back();
+            }
+            continue;
+        }
+
+        const bool parent_is_null = def_level < _definition_level - 1;
+        if (parent_is_null && parent_null_map == nullptr) {
+            return Status::Corruption("Parquet MAP column {} contains null for non-nullable MAP",
+                                      _name);
+        }
+        parent_nulls.push_back(parent_is_null);
+        entry_counts.push_back(def_level >= _definition_level ? 1 : 0);
+        ++*values_read;
+    }
+    set_nested_build_level_cursor(level_idx);
+
+    uint64_t total_entries = 0;
+    for (const auto entry_count : entry_counts) {
+        total_entries += entry_count;
+    }
+    const size_t key_start = key_column->size();
+    int64_t key_value_count = 0;
+    RETURN_IF_ERROR(_key_reader->build_nested_column(static_cast<int64_t>(total_entries),
+                                                     key_column, &key_value_count));
+    if (key_value_count != static_cast<int64_t>(total_entries)) {
+        return Status::Corruption("Parquet MAP column {} built {} keys, expected {}", _name,
+                                  key_value_count, total_entries);
+    }
+    if (const auto* nullable_key_column = check_and_get_column<ColumnNullable>(*key_column);
+        nullable_key_column != nullptr &&
+        nullable_key_column->has_null(key_start, nullable_key_column->size())) {
+        return Status::Corruption("Parquet MAP column {} contains null key", _name);
+    }
+    int64_t value_count = 0;
+    if (auto* scalar_value_reader = dynamic_cast<ScalarColumnReader*>(_value_reader.get())) {
+        const auto& value_def_levels = scalar_value_reader->nested_definition_levels();
+        const auto& value_rep_levels = scalar_value_reader->nested_repetition_levels();
+        const int64_t value_levels_written = scalar_value_reader->nested_levels_written();
+        int64_t value_level_idx = scalar_value_reader->nested_build_level_cursor();
+        for (const int64_t key_level_idx : map_level_indices) {
+            while (value_level_idx < value_levels_written &&
+                   (value_rep_levels[value_level_idx] > _repetition_level ||
+                    value_def_levels[value_level_idx] < min_parent_definition_level ||
+                    (value_rep_levels[value_level_idx] >= _repetition_level &&
+                     value_def_levels[value_level_idx] < _repeated_ancestor_definition_level))) {
+                ++value_level_idx;
+            }
+            if (value_level_idx >= value_levels_written) {
+                return Status::Corruption(
+                        "Parquet MAP column {} value stream ended before key stream", _name);
+            }
+            // MAP is encoded as a repeated key/value struct. The key stream owns entry existence,
+            // but the value stream still has one shape slot for every consumed MAP slot. Consume
+            // value slots in lockstep with key slots so shape-only slots from empty/null maps do
+            // not become scalar values.
+            if (value_rep_levels[value_level_idx] != rep_levels[key_level_idx]) {
+                return Status::Corruption(
+                        "Parquet MAP column {} value repetition level is not aligned with key "
+                        "stream",
+                        _name);
+            }
+            if (def_levels[key_level_idx] >= _definition_level) {
+                RETURN_IF_ERROR(
+                        scalar_value_reader->append_nested_value(value_level_idx, value_column));
+                ++value_count;
+            }
+            ++value_level_idx;
+        }
+        scalar_value_reader->set_nested_build_level_cursor(value_level_idx);
+    } else {
+        // Complex MAP values own their nested shape below the entry slot, so they can recursively
+        // materialize exactly one child value for each MAP entry.
+        RETURN_IF_ERROR(_value_reader->build_nested_column(static_cast<int64_t>(total_entries),
+                                                           value_column, &value_count));
+    }
+    if (value_count != static_cast<int64_t>(total_entries)) {
+        return Status::Corruption("Parquet MAP column {} built {} values, expected {}", _name,
+                                  value_count, total_entries);
+    }
+
+    map_column->get_keys_ptr() = std::move(key_column);
+    map_column->get_values_ptr() = std::move(value_column);
+    append_offsets(map_column->get_offsets(), entry_counts);
+    append_parent_nulls(parent_null_map, parent_nulls);
+    return Status::OK();
+}
+
+const std::vector<int16_t>& MapColumnReader::nested_definition_levels() const {
+    DORIS_CHECK(_key_reader != nullptr);
+    return _key_reader->nested_definition_levels();
+}
+
+const std::vector<int16_t>& MapColumnReader::nested_repetition_levels() const {
+    DORIS_CHECK(_key_reader != nullptr);
+    return _key_reader->nested_repetition_levels();
+}
+
+int64_t MapColumnReader::nested_levels_written() const {
+    DORIS_CHECK(_key_reader != nullptr);
+    return _key_reader->nested_levels_written();
+}
+
+bool MapColumnReader::is_or_has_repeated_child() const {
+    return true;
+}
+
+void MapColumnReader::advance_nested_build_level_cursor_past_parent(
+        int16_t parent_repetition_level) {
+    DORIS_CHECK(_key_reader != nullptr);
+    DORIS_CHECK(_value_reader != nullptr);
+    ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    _key_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    _value_reader->advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/map_column_reader.h b/be/src/format_v2/parquet/reader/map_column_reader.h
new file mode 100644
index 00000000000000..3e26a7a480a2a5
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/map_column_reader.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+//   2. build_nested_column() ->
+class MapColumnReader final : public ParquetColumnReader {
+public:
+    MapColumnReader(const ParquetColumnSchema& schema, DataTypePtr type,
+                    std::unique_ptr<ParquetColumnReader> key_reader,
+                    std::unique_ptr<ParquetColumnReader> value_reader,
+                    ParquetColumnReaderProfile profile = {})
+            : ParquetColumnReader(schema, type, profile),
+              _key_reader(std::move(key_reader)),
+              _value_reader(std::move(value_reader)) {}
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+    bool is_or_has_repeated_child() const override;
+    void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override;
+
+private:
+    std::unique_ptr<ParquetColumnReader> _key_reader; // key column reader (always read fully)
+    std::unique_ptr<ParquetColumnReader>
+            _value_reader; // value column reader (can be pruned by projection)
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/nested_column_materializer.cpp b/be/src/format_v2/parquet/reader/nested_column_materializer.cpp
new file mode 100644
index 00000000000000..e06b7eaaf317e7
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/nested_column_materializer.cpp
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+
+#include <cstdint>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+
+namespace doris::format::parquet {
+
+ColumnArray* array_column_from_output(MutableColumnPtr& column) {
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+        return assert_cast<ColumnArray*>(&nullable_column->get_nested_column());
+    }
+    return assert_cast<ColumnArray*>(column.get());
+}
+
+ColumnMap* map_column_from_output(MutableColumnPtr& column) {
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+        return assert_cast<ColumnMap*>(&nullable_column->get_nested_column());
+    }
+    return assert_cast<ColumnMap*>(column.get());
+}
+
+ColumnStruct* struct_column_from_output(MutableColumnPtr& column) {
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+        return assert_cast<ColumnStruct*>(&nullable_column->get_nested_column());
+    }
+    return assert_cast<ColumnStruct*>(column.get());
+}
+
+NullMap* null_map_from_nullable_output(MutableColumnPtr& column) {
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+        return &nullable_column->get_null_map_data();
+    }
+    return nullptr;
+}
+
+void append_offsets(ColumnArray::Offsets64& offsets, const std::vector<uint64_t>& entry_counts) {
+    offsets.reserve(offsets.size() + entry_counts.size());
+    uint64_t current_offset = offsets.empty() ? 0 : offsets.back();
+    for (const auto entry_count : entry_counts) {
+        current_offset += entry_count;
+        offsets.push_back(current_offset);
+    }
+}
+
+void append_parent_nulls(NullMap* dst, const NullMap& src) {
+    if (dst == nullptr) {
+        return; // target column is not nullable; no null marker is needed
+    }
+    dst->insert(src.begin(), src.end());
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/nested_column_materializer.h b/be/src/format_v2/parquet/reader/nested_column_materializer.h
new file mode 100644
index 00000000000000..90fac01eb2f5e5
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/nested_column_materializer.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "core/column/column.h"
+#include "core/column/column_array.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_struct.h"
+
+namespace doris::format::parquet {
+
+// ============================================================================
+// ============================================================================
+
+ColumnArray* array_column_from_output(MutableColumnPtr& column);
+
+ColumnMap* map_column_from_output(MutableColumnPtr& column);
+
+ColumnStruct* struct_column_from_output(MutableColumnPtr& column);
+
+NullMap* null_map_from_nullable_output(MutableColumnPtr& column);
+
+// offsets[i] = offsets[i-1] + entry_counts[i].
+void append_offsets(ColumnArray::Offsets64& offsets, const std::vector<uint64_t>& entry_counts);
+
+void append_parent_nulls(NullMap* dst, const NullMap& src);
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp b/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp
new file mode 100644
index 00000000000000..c157ff84eef887
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/parquet_leaf_reader.cpp
@@ -0,0 +1,728 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/parquet_leaf_reader.h"
+
+#include <arrow/array/array_binary.h>
+#include <parquet/api/schema.h>
+#include <parquet/column_reader.h>
+#include <parquet/exception.h>
+
+#include <algorithm>
+#include <bit>
+#include <cmath>
+#include <cstring>
+#include <exception>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type_serde/decoded_column_view.h"
+#include "core/string_ref.h"
+#include "runtime/runtime_profile.h"
+#include "util/simd/bits.h"
+
+namespace doris::format::parquet {
+namespace {
+
+DecodedTimeUnit decoded_time_unit(ParquetTimeUnit time_unit) {
+    switch (time_unit) {
+    case ParquetTimeUnit::MILLIS:
+        return DecodedTimeUnit::MILLIS;
+    case ParquetTimeUnit::MICROS:
+        return DecodedTimeUnit::MICROS;
+    case ParquetTimeUnit::NANOS:
+        return DecodedTimeUnit::NANOS;
+    case ParquetTimeUnit::UNKNOWN:
+    default:
+        return DecodedTimeUnit::UNKNOWN;
+    }
+}
+
+Status decoded_fixed_value_size(const std::string& column_name, DecodedValueKind value_kind,
+                                size_t* value_size) {
+    switch (value_kind) {
+    case DecodedValueKind::BOOL:
+        *value_size = sizeof(bool);
+        return Status::OK();
+    case DecodedValueKind::INT32:
+        *value_size = sizeof(int32_t);
+        return Status::OK();
+    case DecodedValueKind::UINT32:
+        *value_size = sizeof(uint32_t);
+        return Status::OK();
+    case DecodedValueKind::INT64:
+        *value_size = sizeof(int64_t);
+        return Status::OK();
+    case DecodedValueKind::UINT64:
+        *value_size = sizeof(uint64_t);
+        return Status::OK();
+    case DecodedValueKind::INT96:
+        *value_size = 12;
+        return Status::OK();
+    case DecodedValueKind::FLOAT:
+        *value_size = sizeof(float);
+        return Status::OK();
+    case DecodedValueKind::DOUBLE:
+        *value_size = sizeof(double);
+        return Status::OK();
+    case DecodedValueKind::BINARY:
+    case DecodedValueKind::FIXED_BINARY:
+        return Status::InvalidArgument("Parquet binary value kind has no fixed value size for {}",
+                                       column_name);
+    }
+    return Status::InternalError("Unknown decoded value kind for column {}", column_name);
+}
+
+Status get_binary_chunks(const std::string& column_name,
+                         ::parquet::internal::RecordReader& record_reader,
+                         std::vector<std::shared_ptr<::arrow::Array>>* chunks) {
+    auto* binary_reader = dynamic_cast<::parquet::internal::BinaryRecordReader*>(&record_reader);
+    if (binary_reader == nullptr) {
+        return Status::InternalError("Parquet binary record reader is not available for column {}",
+                                     column_name);
+    }
+    *chunks = binary_reader->GetBuilderChunks();
+    return Status::OK();
+}
+
+Status build_binary_values(const std::string& column_name,
+                           const std::vector<std::shared_ptr<::arrow::Array>>& chunks,
+                           int64_t records_read, const NullMap* null_map,
+                           bool read_dense_for_nullable, std::vector<StringRef>* binary_values) {
+    std::vector<StringRef> compact_values;
+    auto* values = read_dense_for_nullable ? &compact_values : binary_values;
+    values->reserve(records_read);
+    for (const auto& chunk : chunks) {
+        if (chunk == nullptr) {
+            return Status::Corruption(
+                    "Parquet binary record reader returned null chunk for column {}", column_name);
+        }
+        if (auto* binary_array = dynamic_cast<::arrow::BinaryArray*>(chunk.get())) {
+            for (int64_t row_idx = 0; row_idx < binary_array->length(); ++row_idx) {
+                if (binary_array->IsNull(row_idx)) {
+                    values->emplace_back(static_cast<const char*>(nullptr), 0);
+                    continue;
+                }
+                int32_t length = 0;
+                const uint8_t* value = binary_array->GetValue(row_idx, &length);
+                values->emplace_back(reinterpret_cast<const char*>(value), length);
+            }
+        } else if (auto* fixed_array = dynamic_cast<::arrow::FixedSizeBinaryArray*>(chunk.get())) {
+            for (int64_t row_idx = 0; row_idx < fixed_array->length(); ++row_idx) {
+                if (fixed_array->IsNull(row_idx)) {
+                    values->emplace_back(static_cast<const char*>(nullptr), 0);
+                    continue;
+                }
+                values->emplace_back(reinterpret_cast<const char*>(fixed_array->GetValue(row_idx)),
+                                     fixed_array->byte_width());
+            }
+        } else {
+            return Status::InternalError("Unexpected Arrow binary array type for column {}",
+                                         column_name);
+        }
+    }
+    if (read_dense_for_nullable) {
+        if (null_map == nullptr || null_map->size() != static_cast<size_t>(records_read)) {
+            return Status::Corruption(
+                    "Invalid dense nullable parquet null map for column {}: rows={}, null_map={}",
+                    column_name, records_read, null_map == nullptr ? 0 : null_map->size());
+        }
+        const int64_t non_null_count = static_cast<int64_t>(simd::count_zero_num(
+                reinterpret_cast<const int8_t*>(null_map->data()), null_map->size()));
+        if (compact_values.size() != static_cast<size_t>(non_null_count)) {
+            return Status::Corruption(
+                    "Invalid dense nullable parquet binary values for column {}: values={}, "
+                    "records={}, nulls={}",
+                    column_name, compact_values.size(), records_read,
+                    records_read - non_null_count);
+        }
+        binary_values->reserve(records_read);
+        size_t value_idx = 0;
+        for (int64_t record_idx = 0; record_idx < records_read; ++record_idx) {
+            if ((*null_map)[record_idx] != 0) {
+                binary_values->emplace_back(static_cast<const char*>(nullptr), 0);
+                continue;
+            }
+            binary_values->emplace_back(compact_values[value_idx++]);
+        }
+        return Status::OK();
+    }
+    if (binary_values->size() != static_cast<size_t>(records_read)) {
+        return Status::Corruption(
+                "Invalid parquet binary record read result for column {}: rows={}, records={}",
+                column_name, binary_values->size(), records_read);
+    }
+    return Status::OK();
+}
+
+float half_to_float(uint16_t value) {
+    const uint32_t sign = (value & 0x8000U) << 16;
+    const uint32_t exponent = (value & 0x7C00U) >> 10;
+    const uint32_t mantissa = value & 0x03FFU;
+
+    if (exponent == 0) {
+        if (mantissa == 0) {
+            return std::bit_cast<float>(sign);
+        }
+        const float subnormal = std::ldexp(static_cast<float>(mantissa), -24);
+        return sign == 0 ? subnormal : -subnormal;
+    }
+    if (exponent == 0x1FU) {
+        return std::bit_cast<float>(sign | 0x7F800000U | (mantissa << 13));
+    }
+    return std::bit_cast<float>(sign | ((exponent + 112U) << 23) | (mantissa << 13));
+}
+
+Status build_float16_values(const std::string& column_name,
+                            const ParquetTypeDescriptor& type_descriptor,
+                            const std::vector<StringRef>& binary_values, int64_t row_count,
+                            std::vector<float>* float_values) {
+    if (type_descriptor.fixed_length != 2) {
+        return Status::Corruption("Invalid parquet Float16 length for column {}: {}", column_name,
+                                  type_descriptor.fixed_length);
+    }
+    if (binary_values.size() != static_cast<size_t>(row_count)) {
+        return Status::Corruption(
+                "Invalid parquet Float16 value count for column {}: values={}, rows={}",
+                column_name, binary_values.size(), row_count);
+    }
+    float_values->resize(static_cast<size_t>(row_count));
+    for (int64_t row = 0; row < row_count; ++row) {
+        const auto& binary_value = binary_values[static_cast<size_t>(row)];
+        if (binary_value.data == nullptr && binary_value.size == 0) {
+            (*float_values)[static_cast<size_t>(row)] = 0;
+            continue;
+        }
+        if (binary_value.data == nullptr || binary_value.size != 2) {
+            return Status::Corruption(
+                    "Invalid parquet Float16 value for column {} at row {}: data={}, size={}",
+                    column_name, row, binary_value.data == nullptr ? "null" : "non-null",
+                    binary_value.size);
+        }
+        uint16_t raw_value = 0;
+        std::memcpy(&raw_value, binary_value.data, sizeof(raw_value));
+        (*float_values)[static_cast<size_t>(row)] = half_to_float(raw_value);
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+Status ParquetLeafReader::collect_batch(::parquet::internal::RecordReader& record_reader,
+                                        ParquetLeafBatch* batch) const {
+    DORIS_CHECK(batch != nullptr);
+    batch->_def_levels = nullptr;
+    batch->_rep_levels = nullptr;
+    batch->_fixed_values = nullptr;
+    batch->_binary_chunks.clear();
+    batch->_value_kind = decoded_value_kind(_type_descriptor);
+    batch->_consumed_level_count = record_reader.levels_position();
+    batch->_decoded_level_count = record_reader.levels_written();
+    if (_descriptor->max_definition_level() > 0) {
+        batch->_def_levels = record_reader.def_levels();
+    }
+    if (_descriptor->max_repetition_level() > 0) {
+        batch->_rep_levels = record_reader.rep_levels();
+    }
+    batch->_read_dense_for_nullable = record_reader.read_dense_for_nullable();
+    batch->_values_written = record_reader.values_written();
+
+    if (!batch->is_binary_value()) {
+        batch->_fixed_values = record_reader.values();
+        return Status::OK();
+    }
+
+    RETURN_IF_ERROR(get_binary_chunks(_name, record_reader, &batch->_binary_chunks));
+    batch->_values_written = 0;
+    for (const auto& chunk : batch->_binary_chunks) {
+        if (chunk == nullptr) {
+            return Status::Corruption(
+                    "Parquet binary record reader returned null chunk for column {}", _name);
+        }
+        batch->_values_written += chunk->length();
+    }
+    return Status::OK();
+}
+
+Status ParquetLeafReader::collect_levels_batch(::parquet::internal::RecordReader& record_reader,
+                                               ParquetLeafBatch* batch) const {
+    DORIS_CHECK(batch != nullptr);
+    batch->_def_levels = nullptr;
+    batch->_rep_levels = nullptr;
+    batch->_fixed_values = nullptr;
+    batch->_binary_chunks.clear();
+    batch->_value_kind = decoded_value_kind(_type_descriptor);
+    batch->_consumed_level_count = record_reader.levels_position();
+    batch->_decoded_level_count = record_reader.levels_written();
+    if (_descriptor->max_definition_level() > 0) {
+        batch->_def_levels = record_reader.def_levels();
+    }
+    if (_descriptor->max_repetition_level() > 0) {
+        batch->_rep_levels = record_reader.rep_levels();
+    }
+    batch->_read_dense_for_nullable = record_reader.read_dense_for_nullable();
+
+    // Deliberately ignore values_written(), values() and BinaryRecordReader::GetBuilderChunks().
+    // COUNT(col) only needs top-level shape. Pulling binary chunks transfers Arrow builder
+    // ownership into Doris arrays and later into ColumnString, which is exactly the OOM-prone
+    // materialization path for huge MAP/ARRAY/STRUCT string payloads.
+    batch->_values_written = 0;
+    return Status::OK();
+}
+
+//   - FLOAT16: binary -> half_to_float -> float_values
+Status ParquetLeafReader::append_values(const ParquetLeafBatch& batch, int64_t row_count,
+                                        const NullMap* null_map, MutableColumnPtr& column) const {
+    std::vector<StringRef> binary_values;
+    std::vector<uint8_t> spaced_values;
+    std::vector<float> float_values;
+    DecodedColumnView view;
+    view.value_kind = batch._value_kind;
+    view.time_unit = decoded_time_unit(_type_descriptor.time_unit);
+    view.row_count = row_count;
+    view.logical_integer_bit_width = _type_descriptor.integer_bit_width;
+    view.logical_integer_is_signed = !_type_descriptor.is_unsigned_integer;
+    view.decimal_precision = _type_descriptor.decimal_precision;
+    view.decimal_scale = _type_descriptor.decimal_scale;
+    view.fixed_length = _type_descriptor.fixed_length;
+    view.timestamp_is_adjusted_to_utc = _type_descriptor.timestamp_is_adjusted_to_utc;
+    view.timezone = _timezone;
+    view.enable_strict_mode = _enable_strict_mode;
+    view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data();
+    const bool read_dense_for_nullable = batch._read_dense_for_nullable && view.null_map != nullptr;
+
+    if (_type_descriptor.extra_type_info == ParquetExtraTypeInfo::FLOAT16) {
+        RETURN_IF_ERROR(build_binary_values(_name, batch._binary_chunks, row_count, null_map,
+                                            read_dense_for_nullable, &binary_values));
+        RETURN_IF_ERROR(build_float16_values(_name, _type_descriptor, binary_values, row_count,
+                                             &float_values));
+        view.value_kind = DecodedValueKind::FLOAT;
+        view.values = reinterpret_cast<const uint8_t*>(float_values.data());
+    } else if (batch.is_binary_value()) {
+        RETURN_IF_ERROR(build_binary_values(_name, batch._binary_chunks, row_count, null_map,
+                                            read_dense_for_nullable, &binary_values));
+        view.binary_values = &binary_values;
+    } else if (read_dense_for_nullable) {
+        RETURN_IF_ERROR(build_spaced_fixed_values(batch, row_count, null_map, &spaced_values));
+        view.values = spaced_values.data();
+    } else {
+        view.values = batch._fixed_values;
+    }
+
+    if (_decoded_value_appender != nullptr) {
+        return _decoded_value_appender(column, view);
+    }
+
+    {
+        SCOPED_TIMER(_profile.materialization_time);
+        if (!_type->is_nullable()) {
+            if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column);
+                nullable_column != nullptr) {
+                auto& nested_column = nullable_column->get_nested_column();
+                auto& tmp_null_map = nullable_column->get_null_map_data();
+                const auto old_nested_size = nested_column.size();
+                const auto old_null_map_size = tmp_null_map.size();
+                auto st = _type->get_serde()->read_column_from_decoded_values(nested_column, view);
+                if (!st.ok()) {
+                    nested_column.resize(old_nested_size);
+                    return st;
+                }
+                tmp_null_map.resize(old_null_map_size + nested_column.size() - old_nested_size);
+                memset(tmp_null_map.data() + old_null_map_size, 0,
+                       tmp_null_map.size() - old_null_map_size);
+            } else {
+                RETURN_IF_ERROR(_type->get_serde()->read_column_from_decoded_values(*column, view));
+            }
+        } else {
+            RETURN_IF_ERROR(_type->get_serde()->read_column_from_decoded_values(*column, view));
+        }
+    }
+    return Status::OK();
+}
+
+bool ParquetLeafBatch::is_binary_value() const {
+    return _value_kind == DecodedValueKind::BINARY || _value_kind == DecodedValueKind::FIXED_BINARY;
+}
+
+Status ParquetLeafReader::build_spaced_fixed_values(const ParquetLeafBatch& batch,
+                                                    int64_t row_count, const NullMap* null_map,
+                                                    std::vector<uint8_t>* spaced_values) const {
+    DORIS_CHECK(null_map != nullptr);
+    DORIS_CHECK(spaced_values != nullptr);
+    size_t value_size = 0;
+    RETURN_IF_ERROR(decoded_fixed_value_size(_name, batch._value_kind, &value_size));
+    spaced_values->resize(static_cast<size_t>(row_count) * value_size);
+    const auto non_null_count = static_cast<int64_t>(simd::count_zero_num(
+            reinterpret_cast<const int8_t*>(null_map->data()), null_map->size()));
+    if (batch._values_written != non_null_count) {
+        return Status::Corruption(
+                "Invalid dense nullable parquet values for column {}: values={}, records={}, "
+                "nulls={}",
+                _name, batch._values_written, row_count, row_count - non_null_count);
+    }
+    auto* dst = spaced_values->data();
+    int64_t value_idx = 0;
+    for (int64_t record_idx = 0; record_idx < row_count; ++record_idx) {
+        if ((*null_map)[record_idx] != 0) {
+            continue; // NULL row: skip it and keep the target slot zeroed
+        }
+        std::memcpy(dst + static_cast<size_t>(record_idx) * value_size,
+                    batch._fixed_values + static_cast<size_t>(value_idx) * value_size, value_size);
+        ++value_idx;
+    }
+    return Status::OK();
+}
+
+ParquetLeafReader::ParquetLeafReader(
+        const ::parquet::ColumnDescriptor* descriptor, ParquetTypeDescriptor type_descriptor,
+        DataTypePtr type, std::string name,
+        std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+        ParquetColumnReaderProfile profile, const cctz::time_zone* timezone,
+        bool enable_strict_mode,
+        std::function<Status(MutableColumnPtr&, const DecodedColumnView&)> decoded_value_appender)
+        : _descriptor(descriptor),
+          _type_descriptor(type_descriptor),
+          _type(std::move(type)),
+          _name(std::move(name)),
+          _record_reader(std::move(record_reader)),
+          _profile(profile),
+          _timezone(timezone),
+          _enable_strict_mode(enable_strict_mode),
+          _decoded_value_appender(std::move(decoded_value_appender)) {}
+
+Status ParquetLeafReader::read_batch(int64_t batch_rows, ParquetLeafBatch* batch,
+                                     int64_t* rows_read) const {
+    if (batch == nullptr || rows_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet leaf batch result pointer for column {}",
+                                       _name);
+    }
+    if (_record_reader == nullptr) {
+        return Status::InternalError("Parquet record reader is not initialized for column {}",
+                                     _name);
+    }
+
+    try {
+        _record_reader->Reset();
+        _record_reader->Reserve(batch_rows);
+        {
+            SCOPED_TIMER(_profile.arrow_read_records_time);
+            *rows_read = _record_reader->ReadRecords(batch_rows);
+        }
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to read parquet records for column {}: {}", _name,
+                                  e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to read parquet records for column {}: {}", _name,
+                                     e.what());
+    }
+    if (*rows_read < 0 || *rows_read > batch_rows) {
+        return Status::Corruption("Invalid parquet record read result for column {}: {}", _name,
+                                  *rows_read);
+    }
+    return collect_batch(*_record_reader, batch);
+}
+
+Status ParquetLeafReader::build_null_map(const ParquetLeafBatch& batch, int64_t records_read,
+                                         NullMap* null_map) const {
+    if (_descriptor->max_definition_level() == 0) {
+        return Status::OK();
+    }
+    auto* def_levels = batch.def_levels();
+    if (def_levels == nullptr && records_read > 0) {
+        return Status::Corruption(
+                "Parquet record reader returned null definition levels for nullable column {}",
+                _name);
+    }
+    const int16_t max_definition_level = _descriptor->max_definition_level();
+    null_map->resize(records_read);
+    auto* __restrict dst = null_map->data();
+    const auto* __restrict src = def_levels;
+    for (int64_t record_idx = 0; record_idx < records_read; ++record_idx) {
+        dst[record_idx] = src[record_idx] != max_definition_level;
+    }
+    return Status::OK();
+}
+
+Status ParquetLeafReader::read_nested_batch(int64_t batch_rows, int16_t value_slot_definition_level,
+                                            ParquetNestedScalarBatch* batch,
+                                            int16_t value_slot_repetition_level) const {
+    ParquetLeafBatch leaf_batch;
+    int64_t records_read = 0;
+    RETURN_IF_ERROR(read_batch(batch_rows, &leaf_batch, &records_read));
+    return build_nested_batch_from_leaf_batch(leaf_batch, records_read, value_slot_definition_level,
+                                              batch, value_slot_repetition_level);
+}
+
+Status ParquetLeafReader::read_nested_levels_batch(int64_t batch_rows,
+                                                   ParquetNestedScalarBatch* batch) const {
+    if (batch == nullptr) {
+        return Status::InvalidArgument("Nested scalar levels batch is null for column {}", _name);
+    }
+    if (_record_reader == nullptr) {
+        return Status::InternalError("Parquet record reader is not initialized for column {}",
+                                     _name);
+    }
+
+    int64_t records_read = 0;
+    ParquetLeafBatch leaf_batch;
+    try {
+        _record_reader->Reset();
+        _record_reader->Reserve(batch_rows);
+        {
+            SCOPED_TIMER(_profile.arrow_read_records_time);
+            records_read = _record_reader->ReadRecords(batch_rows);
+        }
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to read parquet levels for column {}: {}", _name,
+                                  e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to read parquet levels for column {}: {}", _name,
+                                     e.what());
+    }
+    if (records_read < 0 || records_read > batch_rows) {
+        return Status::Corruption("Invalid parquet level read result for column {}: {}", _name,
+                                  records_read);
+    }
+    RETURN_IF_ERROR(collect_levels_batch(*_record_reader, &leaf_batch));
+    return build_nested_levels_batch_from_leaf_batch(leaf_batch, records_read, batch);
+}
+
+Status ParquetLeafReader::build_nested_batch_from_leaf_batch(
+        const ParquetLeafBatch& leaf_batch, int64_t records_read,
+        int16_t value_slot_definition_level, ParquetNestedScalarBatch* batch,
+        int16_t value_slot_repetition_level) const {
+    if (batch == nullptr) {
+        return Status::InvalidArgument("Nested scalar batch is null for column {}", _name);
+    }
+    *batch = ParquetNestedScalarBatch();
+    batch->value_slot_definition_level = value_slot_definition_level;
+    batch->value_slot_repetition_level = value_slot_repetition_level;
+
+    batch->records_read = records_read;
+    if (_type->is_nullable() && leaf_batch.read_dense_for_nullable()) {
+        return Status::NotSupported(
+                "Dense nullable parquet nested reader is not supported for column {}", _name);
+    }
+    batch->levels_written = leaf_batch.consumed_level_count();
+    const int64_t values_written = leaf_batch.values_written();
+    if (batch->levels_written > leaf_batch.decoded_level_count()) {
+        return Status::Corruption(
+                "Invalid nested parquet level position for column {}: position={}, levels={}",
+                _name, batch->levels_written, leaf_batch.decoded_level_count());
+    }
+    if (batch->levels_written == 0 && batch->records_read > 0 &&
+        values_written == batch->records_read && _descriptor->max_definition_level() == 0 &&
+        _descriptor->max_repetition_level() == 0) {
+        batch->levels_written = batch->records_read;
+    }
+    if (batch->levels_written < batch->records_read || values_written < 0 ||
+        values_written > batch->levels_written) {
+        return Status::Corruption(
+                "Invalid nested parquet read result for column {}: rows={}, levels={}, values={}",
+                _name, batch->records_read, batch->levels_written, values_written);
+    }
+    if (batch->levels_written == 0) {
+        return Status::OK();
+    }
+
+    auto* def_levels = leaf_batch.def_levels();
+    if (def_levels == nullptr && _descriptor->max_definition_level() > 0) {
+        return Status::Corruption(
+                "Nested parquet reader returned null definition levels for column {}", _name);
+    }
+    batch->def_levels.resize(static_cast<size_t>(batch->levels_written));
+    if (_descriptor->max_definition_level() == 0 || def_levels == nullptr) {
+        std::fill(batch->def_levels.begin(), batch->def_levels.end(),
+                  _descriptor->max_definition_level());
+    } else {
+        std::copy(def_levels, def_levels + batch->levels_written, batch->def_levels.begin());
+    }
+
+    auto* rep_levels = leaf_batch.rep_levels();
+    if (rep_levels == nullptr && _descriptor->max_repetition_level() > 0) {
+        return Status::Corruption(
+                "Nested parquet reader returned null repetition levels for column {}", _name);
+    }
+    batch->rep_levels.resize(static_cast<size_t>(batch->levels_written));
+    if (_descriptor->max_repetition_level() == 0 || rep_levels == nullptr) {
+        std::fill(batch->rep_levels.begin(), batch->rep_levels.end(), 0);
+    } else {
+        std::copy(rep_levels, rep_levels + batch->levels_written, batch->rep_levels.begin());
+    }
+
+    const int16_t leaf_definition_level = _descriptor->max_definition_level();
+    // Arrow's RecordReader may emit value placeholders for null ancestors that are below the
+    // Doris materialization threshold. Those slots must still advance the payload value index;
+    // otherwise the next defined child level points at the placeholder instead of its real value.
+    auto count_value_slots = [&](int16_t slot_definition_level) {
+        int64_t slot_count = 0;
+        for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) {
+            if (batch->def_levels[level_idx] >= slot_definition_level &&
+                batch->rep_levels[level_idx] <= value_slot_repetition_level) {
+                ++slot_count;
+            }
+        }
+        return slot_count;
+    };
+
+    const int64_t value_slot_count = count_value_slots(value_slot_definition_level);
+    int16_t payload_slot_definition_level = value_slot_definition_level;
+    int64_t payload_value_slot_count = value_slot_count;
+    while (payload_slot_definition_level > 0 && payload_value_slot_count < values_written) {
+        --payload_slot_definition_level;
+        payload_value_slot_count = count_value_slots(payload_slot_definition_level);
+    }
+
+    int64_t leaf_value_count = 0;
+    for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) {
+        if (batch->def_levels[level_idx] < value_slot_definition_level ||
+            batch->rep_levels[level_idx] > value_slot_repetition_level) {
+            continue;
+        }
+        if (batch->def_levels[level_idx] == leaf_definition_level) {
+            ++leaf_value_count;
+        }
+    }
+
+    enum class ValueLayout { LEVELS, VALUE_SLOTS, LEAF_VALUES, PAYLOAD_VALUE_SLOTS };
+    ValueLayout value_layout = ValueLayout::LEAF_VALUES;
+    if (values_written == batch->levels_written) {
+        value_layout = ValueLayout::LEVELS;
+    } else if (values_written == value_slot_count) {
+        value_layout = ValueLayout::VALUE_SLOTS;
+    } else if (values_written == leaf_value_count) {
+        value_layout = ValueLayout::LEAF_VALUES;
+    } else if (values_written == payload_value_slot_count) {
+        value_layout = ValueLayout::PAYLOAD_VALUE_SLOTS;
+    } else {
+        return Status::Corruption(
+                "Nested parquet reader returned inconsistent value count for column {}: values={}, "
+                "levels={}, slots={}, leaf_values={}, payload_slots={}, "
+                "payload_slot_definition_level={}",
+                _name, values_written, batch->levels_written, value_slot_count, leaf_value_count,
+                payload_value_slot_count, payload_slot_definition_level);
+    }
+
+    batch->value_indices.resize(static_cast<size_t>(batch->levels_written), -1);
+    NullMap value_nulls(static_cast<size_t>(values_written), 1);
+    int64_t value_idx = 0;
+    const int16_t decoded_slot_definition_level = value_layout == ValueLayout::PAYLOAD_VALUE_SLOTS
+                                                          ? payload_slot_definition_level
+                                                          : value_slot_definition_level;
+    for (int64_t level_idx = 0; level_idx < batch->levels_written; ++level_idx) {
+        if (batch->def_levels[level_idx] < decoded_slot_definition_level ||
+            batch->rep_levels[level_idx] > value_slot_repetition_level) {
+            continue;
+        }
+        const bool has_leaf_value = batch->def_levels[level_idx] == leaf_definition_level;
+        int64_t decoded_value_idx = -1;
+        if (value_layout == ValueLayout::LEVELS) {
+            decoded_value_idx = level_idx;
+        } else if (value_layout == ValueLayout::VALUE_SLOTS) {
+            decoded_value_idx = value_idx++;
+        } else if (value_layout == ValueLayout::PAYLOAD_VALUE_SLOTS) {
+            decoded_value_idx = value_idx++;
+        } else {
+            if (!has_leaf_value) {
+                continue;
+            }
+            decoded_value_idx = value_idx++;
+        }
+        DORIS_CHECK(decoded_value_idx >= 0);
+        DORIS_CHECK(decoded_value_idx < values_written);
+        if (has_leaf_value) {
+            batch->value_indices[static_cast<size_t>(level_idx)] = decoded_value_idx;
+            value_nulls[static_cast<size_t>(decoded_value_idx)] = 0;
+        }
+    }
+    if (value_layout != ValueLayout::LEVELS && value_idx != values_written) {
+        return Status::Corruption(
+                "Nested parquet reader value cursor stopped early for column {}: values={}, "
+                "visited={}",
+                _name, values_written, value_idx);
+    }
+
+    const auto value_type = remove_nullable(_type);
+    batch->values_column = value_type->create_column();
+    if (values_written > 0) {
+        ParquetLeafReader value_reader(_descriptor, _type_descriptor, value_type, _name,
+                                       _record_reader, _profile, _timezone, _enable_strict_mode);
+        RETURN_IF_ERROR(value_reader.append_values(leaf_batch, values_written, &value_nulls,
+                                                   batch->values_column));
+    }
+    return Status::OK();
+}
+
+Status ParquetLeafReader::build_nested_levels_batch_from_leaf_batch(
+        const ParquetLeafBatch& leaf_batch, int64_t records_read,
+        ParquetNestedScalarBatch* batch) const {
+    if (batch == nullptr) {
+        return Status::InvalidArgument("Nested scalar levels batch is null for column {}", _name);
+    }
+    *batch = ParquetNestedScalarBatch();
+    batch->records_read = records_read;
+    batch->levels_written = leaf_batch.consumed_level_count();
+    if (batch->levels_written > leaf_batch.decoded_level_count()) {
+        return Status::Corruption(
+                "Invalid nested parquet level position for column {}: position={}, levels={}",
+                _name, batch->levels_written, leaf_batch.decoded_level_count());
+    }
+
+    // Required flat leaves do not have physical def/rep level buffers. Synthesize one level slot
+    // per top-level row so the COUNT(col) aggregation code can use the same shape loop.
+    if (batch->levels_written == 0 && batch->records_read > 0 &&
+        _descriptor->max_definition_level() == 0 && _descriptor->max_repetition_level() == 0) {
+        batch->levels_written = batch->records_read;
+    }
+    if (batch->levels_written < batch->records_read) {
+        return Status::Corruption(
+                "Invalid nested parquet levels result for column {}: rows={}, levels={}", _name,
+                batch->records_read, batch->levels_written);
+    }
+    if (batch->levels_written == 0) {
+        return Status::OK();
+    }
+
+    auto* def_levels = leaf_batch.def_levels();
+    if (def_levels == nullptr && _descriptor->max_definition_level() > 0) {
+        return Status::Corruption(
+                "Nested parquet reader returned null definition levels for column {}", _name);
+    }
+    batch->def_levels.resize(static_cast<size_t>(batch->levels_written));
+    if (_descriptor->max_definition_level() == 0 || def_levels == nullptr) {
+        std::fill(batch->def_levels.begin(), batch->def_levels.end(),
+                  _descriptor->max_definition_level());
+    } else {
+        std::copy(def_levels, def_levels + batch->levels_written, batch->def_levels.begin());
+    }
+
+    auto* rep_levels = leaf_batch.rep_levels();
+    if (rep_levels == nullptr && _descriptor->max_repetition_level() > 0) {
+        return Status::Corruption(
+                "Nested parquet reader returned null repetition levels for column {}", _name);
+    }
+    batch->rep_levels.resize(static_cast<size_t>(batch->levels_written));
+    if (_descriptor->max_repetition_level() == 0 || rep_levels == nullptr) {
+        std::fill(batch->rep_levels.begin(), batch->rep_levels.end(), 0);
+    } else {
+        std::copy(rep_levels, rep_levels + batch->levels_written, batch->rep_levels.begin());
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/parquet_leaf_reader.h b/be/src/format_v2/parquet/reader/parquet_leaf_reader.h
new file mode 100644
index 00000000000000..73b0a75e019dbd
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/parquet_leaf_reader.h
@@ -0,0 +1,168 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/column/column.h"
+#include "core/column/column_nullable.h"
+#include "core/data_type_serde/decoded_column_view.h"
+#include "format_v2/parquet/parquet_profile.h"
+#include "format_v2/parquet/parquet_type.h"
+
+namespace parquet {
+class ColumnDescriptor;
+
+namespace internal {
+class RecordReader;
+} // namespace internal
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace arrow {
+class Array;
+} // namespace arrow
+
+namespace doris::format::parquet {
+
+struct ParquetLeafReaderTestAccess;
+
+// Read result for a nested scalar leaf, separating Dremel-encoded shape from actual values.
+// The COUNT(col) aggregation fast path consumes only records_read, levels_written, def_levels, and rep_levels.
+// That path does not populate value_indices or values_column, so callers must not call build_nested_column() afterwards.
+struct ParquetNestedScalarBatch {
+    int64_t records_read = 0;
+    int64_t levels_written = 0;
+    int16_t value_slot_definition_level = 0;
+    int16_t value_slot_repetition_level = std::numeric_limits<int16_t>::max();
+    std::vector<int16_t> def_levels;
+    std::vector<int16_t> rep_levels;
+    std::vector<int64_t> value_indices;
+    MutableColumnPtr values_column;
+
+    bool empty() const { return levels_written == 0; }
+};
+
+class ParquetLeafBatch {
+public:
+    int64_t consumed_level_count() const { return _consumed_level_count; }
+    int64_t decoded_level_count() const { return _decoded_level_count; }
+    int64_t values_written() const { return _values_written; }
+    bool read_dense_for_nullable() const { return _read_dense_for_nullable; }
+    const int16_t* def_levels() const { return _def_levels; }
+    const int16_t* rep_levels() const { return _rep_levels; }
+
+private:
+    friend class ParquetLeafReader;
+
+    bool is_binary_value() const;
+
+    DecodedValueKind _value_kind = DecodedValueKind::INT32;
+    int64_t _consumed_level_count = 0;
+    int64_t _decoded_level_count = 0;
+    int64_t _values_written = 0;
+    const int16_t* _def_levels = nullptr;
+    const int16_t* _rep_levels = nullptr;
+    const uint8_t* _fixed_values = nullptr;
+    bool _read_dense_for_nullable = false;
+    std::vector<std::shared_ptr<::arrow::Array>> _binary_chunks;
+};
+
+//      read_batch() -> build_null_map() + append_values()
+//      read_nested_batch()
+class ParquetLeafReader {
+public:
+    ParquetLeafReader(const ::parquet::ColumnDescriptor* descriptor,
+                      ParquetTypeDescriptor type_descriptor, DataTypePtr type, std::string name,
+                      std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+                      ParquetColumnReaderProfile profile = {},
+                      const cctz::time_zone* timezone = nullptr, bool enable_strict_mode = false,
+                      std::function<Status(MutableColumnPtr&, const DecodedColumnView&)>
+                              decoded_value_appender = nullptr);
+
+    Status read_batch(int64_t batch_rows, ParquetLeafBatch* batch, int64_t* rows_read) const;
+
+    Status build_null_map(const ParquetLeafBatch& batch, int64_t records_read,
+                          NullMap* null_map) const;
+
+    Status append_values(const ParquetLeafBatch& batch, int64_t row_count, const NullMap* null_map,
+                         MutableColumnPtr& column) const;
+
+    // LEVELS / VALUE_SLOTS / LEAF_VALUES / PAYLOAD_VALUE_SLOTS.
+    Status read_nested_batch(
+            int64_t batch_rows, int16_t value_slot_definition_level,
+            ParquetNestedScalarBatch* batch,
+            int16_t value_slot_repetition_level = std::numeric_limits<int16_t>::max()) const;
+
+    // COUNT(col) shape-only read path. It still calls Arrow RecordReader::ReadRecords()
+    // to advance the Parquet cursor and obtain def/rep levels, but Doris only copies levels:
+    // - it does not call BinaryRecordReader::GetBuilderChunks()
+    // - it does not build value_indices or values_column
+    // - it does not enter DataTypeSerde::read_column_from_decoded_values()
+    // This lets COUNT(col) on MAP/ARRAY/STRUCT evaluate top-level NULL state while avoiding
+    // materializing representative leaf STRING/BINARY payloads into Doris Columns. Arrow RecordReader
+    // does not expose a public levels-only API, so ReadRecords may still perform required page decoding;
+    // this API guarantees that the V2 reader does not take ownership of or copy value payloads.
+    Status read_nested_levels_batch(int64_t batch_rows, ParquetNestedScalarBatch* batch) const;
+
+private:
+    friend struct ParquetLeafReaderTestAccess;
+
+    Status collect_batch(::parquet::internal::RecordReader& record_reader,
+                         ParquetLeafBatch* batch) const;
+
+    // Levels-only variant of collect_batch(). It snapshots only def/rep level state and does not take
+    // binary chunks or expose fixed-width value buffers. Used by the COUNT(col) aggregation fast path.
+    Status collect_levels_batch(::parquet::internal::RecordReader& record_reader,
+                                ParquetLeafBatch* batch) const;
+
+    Status build_spaced_fixed_values(const ParquetLeafBatch& batch, int64_t row_count,
+                                     const NullMap* null_map,
+                                     std::vector<uint8_t>* spaced_values) const;
+
+    Status build_nested_batch_from_leaf_batch(const ParquetLeafBatch& leaf_batch,
+                                              int64_t records_read,
+                                              int16_t value_slot_definition_level,
+                                              ParquetNestedScalarBatch* batch,
+                                              int16_t value_slot_repetition_level) const;
+    Status build_nested_levels_batch_from_leaf_batch(const ParquetLeafBatch& leaf_batch,
+                                                     int64_t records_read,
+                                                     ParquetNestedScalarBatch* batch) const;
+
+    const ::parquet::ColumnDescriptor* _descriptor =
+            nullptr; // Arrow column descriptor (physical_type, max_dl, max_rl)
+    ParquetTypeDescriptor
+            _type_descriptor; // type encoding information (decimal precision, timestamp unit, etc.)
+    DataTypePtr _type;        // Doris target type
+    std::string _name;        // column name for error messages
+    std::shared_ptr<::parquet::internal::RecordReader>
+            _record_reader;                     // Arrow physical column reader (shared ownership)
+    ParquetColumnReaderProfile _profile;        // profile counters
+    const cctz::time_zone* _timezone = nullptr; // timezone for timestamp conversion
+    bool _enable_strict_mode = false;           // strict mode for type mismatch errors
+    std::function<Status(MutableColumnPtr&, const DecodedColumnView&)> _decoded_value_appender;
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/row_position_column_reader.cpp b/be/src/format_v2/parquet/reader/row_position_column_reader.cpp
new file mode 100644
index 00000000000000..4e9a363b13c7cb
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/row_position_column_reader.cpp
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/row_position_column_reader.h"
+
+#include <memory>
+
+#include "core/assert_cast.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+
+namespace doris::format::parquet {
+
+RowPositionColumnReader::RowPositionColumnReader(int64_t row_group_first_row,
+                                                 ParquetColumnReaderProfile profile)
+        : ParquetColumnReader(ParquetColumnSchema {.name = format::ROW_POSITION_COLUMN_NAME},
+                              std::make_shared<DataTypeInt64>(), profile),
+          _row_group_first_row(row_group_first_row) {}
+
+int RowPositionColumnReader::file_column_id() const {
+    return format::ROW_POSITION_COLUMN_ID;
+}
+
+int RowPositionColumnReader::parquet_leaf_column_id() const {
+    return -1;
+}
+
+const DataTypePtr& RowPositionColumnReader::type() const {
+    return _type;
+}
+
+const std::string& RowPositionColumnReader::name() const {
+    return _name;
+}
+
+Status RowPositionColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    if (column.get() == nullptr || rows_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet row position read result pointer");
+    }
+    if (rows < 0) {
+        return Status::InvalidArgument("Invalid parquet row position read rows {}", rows);
+    }
+    auto* vector_column = assert_cast<ColumnInt64*>(column.get());
+    auto& data = vector_column->get_data();
+    const auto old_size = data.size();
+    data.resize(old_size + rows);
+    for (int64_t row = 0; row < rows; ++row) {
+        data[old_size + row] = _row_group_first_row + _next_row_position + row;
+    }
+    _next_row_position += rows;
+    *rows_read = rows;
+    return Status::OK();
+}
+
+Status RowPositionColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    _next_row_position += rows;
+    return Status::OK();
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/row_position_column_reader.h b/be/src/format_v2/parquet/reader/row_position_column_reader.h
new file mode 100644
index 00000000000000..934100317ec4fd
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/row_position_column_reader.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <string>
+
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+class RowPositionColumnReader final : public ParquetColumnReader {
+public:
+    explicit RowPositionColumnReader(int64_t row_group_first_row,
+                                     ParquetColumnReaderProfile profile = {});
+
+    int file_column_id() const override;
+    int parquet_leaf_column_id() const override;
+    const DataTypePtr& type() const override;
+    const std::string& name() const override;
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+
+private:
+    int64_t _row_group_first_row = 0; // first file row of the current row group
+    int64_t _next_row_position = 0;   // next row position to emit
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/scalar_column_reader.cpp b/be/src/format_v2/parquet/reader/scalar_column_reader.cpp
new file mode 100644
index 00000000000000..3c90279b4412b4
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/scalar_column_reader.cpp
@@ -0,0 +1,315 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+
+#include <parquet/api/reader.h>
+
+#include <algorithm>
+#include <exception>
+#include <utility>
+
+#include "core/column/column.h"
+#include "core/column/column_nullable.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "util/simd/bits.h"
+
+namespace doris::format::parquet {
+namespace {
+
+class ParquetNestedScalarValueCursor {
+public:
+    explicit ParquetNestedScalarValueCursor(const ParquetNestedScalarBatch* batch) { reset(batch); }
+
+    void reset(const ParquetNestedScalarBatch* batch) {
+        DORIS_CHECK(batch != nullptr);
+        _batch = batch;
+    }
+
+    Status value_index(const std::string& column_name, int64_t level_idx, int64_t* value_idx) {
+        DORIS_CHECK(_batch != nullptr);
+        DORIS_CHECK(value_idx != nullptr);
+        DORIS_CHECK(level_idx < _batch->levels_written);
+        DORIS_CHECK(level_idx >= 0);
+        DORIS_CHECK(static_cast<size_t>(level_idx) < _batch->value_indices.size());
+        const int64_t computed_value_idx = _batch->value_indices[static_cast<size_t>(level_idx)];
+        if (computed_value_idx < 0) {
+            return Status::Corruption("Nested parquet value is absent for column {}", column_name);
+        }
+        DORIS_CHECK(_batch->values_column.get() != nullptr);
+        if (computed_value_idx >= _batch->values_column->size()) {
+            return Status::Corruption("Nested parquet value index is out of range for column {}",
+                                      column_name);
+        }
+        *value_idx = computed_value_idx;
+        return Status::OK();
+    }
+
+private:
+    const ParquetNestedScalarBatch* _batch = nullptr;
+};
+
+Status append_scalar_batch_value(const ScalarColumnReader& column_reader,
+                                 const ParquetNestedScalarBatch& batch, int64_t level_idx,
+                                 ParquetNestedScalarValueCursor* value_cursor,
+                                 MutableColumnPtr& column) {
+    DORIS_CHECK(value_cursor != nullptr);
+    int64_t value_idx = -1;
+    RETURN_IF_ERROR(value_cursor->value_index(column_reader.name(), level_idx, &value_idx));
+    auto* nullable_column = check_and_get_column<ColumnNullable>(*column);
+    if (nullable_column != nullptr) {
+        nullable_column->get_nested_column().insert_from(*batch.values_column,
+                                                         static_cast<size_t>(value_idx));
+        nullable_column->get_null_map_data().push_back(0);
+        return Status::OK();
+    }
+    column->insert_from(*batch.values_column, static_cast<size_t>(value_idx));
+    return Status::OK();
+}
+
+} // namespace
+
+ScalarColumnReader::ScalarColumnReader(
+        const ParquetColumnSchema& column_schema,
+        std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+        const ParquetPageSkipPlan* page_skip_plan, const cctz::time_zone* timezone,
+        bool enable_strict_mode, ParquetColumnReaderProfile profile)
+        : ParquetColumnReader(column_schema, column_schema.type, profile),
+          _descriptor(column_schema.descriptor),
+          _type_descriptor(column_schema.type_descriptor),
+          _record_reader(std::move(record_reader)),
+          _page_skip_plan(page_skip_plan),
+          _timezone(timezone),
+          _enable_strict_mode(enable_strict_mode),
+          _nested_batch(std::make_unique<ParquetNestedScalarBatch>()) {}
+
+ScalarColumnReader::~ScalarColumnReader() = default;
+
+Status ScalarColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    if (column.get() == nullptr || rows_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet column read result pointer for column {}",
+                                       _name);
+    }
+    if (_record_reader == nullptr) {
+        return Status::InternalError("Parquet record reader is not initialized for column {}",
+                                     _name);
+    }
+    auto reader = leaf_reader();
+    ParquetLeafBatch leaf_batch;
+    RETURN_IF_ERROR(reader.read_batch(rows, &leaf_batch, rows_read));
+
+    NullMap null_map;
+    RETURN_IF_ERROR(reader.build_null_map(leaf_batch, *rows_read, &null_map));
+    const auto value_kind = decoded_value_kind(_type_descriptor);
+    const bool is_binary_value =
+            value_kind == DecodedValueKind::BINARY || value_kind == DecodedValueKind::FIXED_BINARY;
+    if (!is_binary_value && leaf_batch.read_dense_for_nullable() && !null_map.empty()) {
+        const int64_t non_null_count = static_cast<int64_t>(simd::count_zero_num(
+                reinterpret_cast<const int8_t*>(null_map.data()), null_map.size()));
+        const int64_t null_count = *rows_read - non_null_count;
+        if (leaf_batch.values_written() != non_null_count) {
+            return Status::Corruption(
+                    "Invalid dense nullable parquet record read result for column {}: values={}, "
+                    "records={}, nulls={}",
+                    _name, leaf_batch.values_written(), *rows_read, null_count);
+        }
+    } else if (!is_binary_value && !leaf_batch.read_dense_for_nullable() &&
+               leaf_batch.values_written() != *rows_read) {
+        return Status::Corruption(
+                "Invalid parquet record read result for column {}: values={}, records={}", _name,
+                leaf_batch.values_written(), *rows_read);
+    }
+
+    RETURN_IF_ERROR(reader.append_values(leaf_batch, *rows_read, &null_map, column));
+    advance_rows_read(*rows_read);
+    update_reader_read_rows(*rows_read);
+    return Status::OK();
+}
+
+Status ScalarColumnReader::skip_records(int64_t rows) {
+    if (_record_reader == nullptr) {
+        return Status::InternalError("Parquet record reader is not initialized for column {}",
+                                     _name);
+    }
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    int64_t skipped_rows = 0;
+    try {
+        _record_reader->Reset();
+        while (skipped_rows < rows) {
+            const int64_t skipped = _record_reader->SkipRecords(rows - skipped_rows);
+            if (skipped <= 0) {
+                return Status::Corruption(
+                        "Failed to skip parquet records for column {}: skipped {} of {} rows",
+                        _name, skipped_rows, rows);
+            }
+            skipped_rows += skipped;
+        }
+    } catch (const ::parquet::ParquetException& e) {
+        return Status::Corruption("Failed to skip parquet records for column {}: {}", _name,
+                                  e.what());
+    } catch (const std::exception& e) {
+        return Status::InternalError("Failed to skip parquet records for column {}: {}", _name,
+                                     e.what());
+    }
+    update_reader_skip_rows(rows);
+    return Status::OK();
+}
+
+int64_t ScalarColumnReader::page_filtered_rows_to_skip(int64_t rows) const {
+    if (_page_skip_plan == nullptr || rows <= 0) {
+        return 0;
+    }
+    const int64_t skip_end = _row_group_rows_read + rows;
+    int64_t filtered_rows = 0;
+    for (const auto& range : _page_skip_plan->skipped_ranges) {
+        const int64_t range_end = range.start + range.length;
+        if (range_end <= _row_group_rows_read) {
+            continue;
+        }
+        if (range.start >= skip_end) {
+            break;
+        }
+        const int64_t start = std::max(range.start, _row_group_rows_read);
+        const int64_t end = std::min(range_end, skip_end);
+        if (start < end) {
+            // Scheduler gap skips are derived from page-index selected_ranges. A page-filtered
+            // range can only overlap such a gap when the whole data page is outside every selected
+            // range, so partial overlap would mean the planner and scheduler are out of sync.
+            DORIS_CHECK(start == range.start);
+            DORIS_CHECK(end == range_end);
+            filtered_rows += end - start;
+        }
+    }
+    return filtered_rows;
+}
+
+void ScalarColumnReader::advance_rows_read(int64_t rows) {
+    DORIS_CHECK(rows >= 0);
+    _row_group_rows_read += rows;
+}
+
+Status ScalarColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+
+    const int64_t page_filtered_rows = page_filtered_rows_to_skip(rows);
+    DORIS_CHECK(page_filtered_rows <= rows);
+    const int64_t record_reader_skip_rows = rows - page_filtered_rows;
+    RETURN_IF_ERROR(skip_records(record_reader_skip_rows));
+    advance_rows_read(rows);
+    return Status::OK();
+}
+
+// The value index stream must advance on those null slots, otherwise later payload values shift.
+Status ScalarColumnReader::load_nested_batch(int64_t rows) {
+    DORIS_CHECK(_nested_batch != nullptr);
+    reset_nested_build_level_cursor();
+    const int16_t materialized_slot_definition_level =
+            static_cast<int16_t>(_definition_level - (_type->is_nullable() ? 1 : 0));
+    RETURN_IF_ERROR(leaf_reader().read_nested_batch(rows, materialized_slot_definition_level,
+                                                    _nested_batch.get(), _repetition_level));
+    advance_rows_read(_nested_batch->records_read);
+    update_reader_read_rows(_nested_batch->records_read);
+    return Status::OK();
+}
+
+Status ScalarColumnReader::load_nested_levels_batch(int64_t rows) {
+    DORIS_CHECK(_nested_batch != nullptr);
+    reset_nested_build_level_cursor();
+    RETURN_IF_ERROR(leaf_reader().read_nested_levels_batch(rows, _nested_batch.get()));
+    advance_rows_read(_nested_batch->records_read);
+    update_reader_read_rows(_nested_batch->records_read);
+    return Status::OK();
+}
+
+Status ScalarColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                               int64_t* values_read) {
+    if (column.get() == nullptr || values_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet nested scalar build result for column {}",
+                                       _name);
+    }
+    DORIS_CHECK(_nested_batch != nullptr);
+    ParquetNestedScalarValueCursor value_cursor(_nested_batch.get());
+    const int16_t materialized_slot_definition_level = _nested_batch->value_slot_definition_level;
+    *values_read = 0;
+    int64_t level_idx = nested_build_level_cursor();
+    while (level_idx < _nested_batch->levels_written && *values_read < length_upper_bound) {
+        const int64_t current_level_idx = level_idx;
+        const int16_t def_level = _nested_batch->def_levels[current_level_idx];
+        const int16_t rep_level = _nested_batch->rep_levels[current_level_idx];
+        ++level_idx;
+        if (def_level < materialized_slot_definition_level || rep_level > _repetition_level) {
+            continue;
+        }
+        if (def_level == _definition_level) {
+            RETURN_IF_ERROR(append_scalar_batch_value(*this, *_nested_batch, current_level_idx,
+                                                      &value_cursor, column));
+        } else {
+            if (!_type->is_nullable() && def_level >= _nullable_definition_level) {
+                return Status::Corruption(
+                        "Parquet scalar column {} contains null for non-nullable field", _name);
+            }
+            column->insert_default();
+        }
+        ++*values_read;
+    }
+    set_nested_build_level_cursor(level_idx);
+    return Status::OK();
+}
+
+Status ScalarColumnReader::append_nested_value(int64_t level_idx, MutableColumnPtr& column) const {
+    if (column.get() == nullptr) {
+        return Status::InvalidArgument("Invalid parquet nested scalar append result for column {}",
+                                       _name);
+    }
+    DORIS_CHECK(_nested_batch != nullptr);
+    DORIS_CHECK(level_idx >= 0);
+    DORIS_CHECK(level_idx < _nested_batch->levels_written);
+    ParquetNestedScalarValueCursor value_cursor(_nested_batch.get());
+    const int16_t def_level = _nested_batch->def_levels[level_idx];
+    if (def_level == _definition_level) {
+        return append_scalar_batch_value(*this, *_nested_batch, level_idx, &value_cursor, column);
+    }
+    if (!_type->is_nullable()) {
+        return Status::Corruption("Parquet MAP column {} contains null for non-nullable value",
+                                  _name);
+    }
+    column->insert_default();
+    return Status::OK();
+}
+
+const std::vector<int16_t>& ScalarColumnReader::nested_definition_levels() const {
+    DORIS_CHECK(_nested_batch != nullptr);
+    return _nested_batch->def_levels;
+}
+
+const std::vector<int16_t>& ScalarColumnReader::nested_repetition_levels() const {
+    DORIS_CHECK(_nested_batch != nullptr);
+    return _nested_batch->rep_levels;
+}
+
+int64_t ScalarColumnReader::nested_levels_written() const {
+    DORIS_CHECK(_nested_batch != nullptr);
+    return _nested_batch->levels_written;
+}
+
+bool ScalarColumnReader::is_or_has_repeated_child() const {
+    return _repetition_level > 0;
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/scalar_column_reader.h b/be/src/format_v2/parquet/reader/scalar_column_reader.h
new file mode 100644
index 00000000000000..ab7ba0d7e54388
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/scalar_column_reader.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "format_v2/parquet/parquet_type.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/parquet/reader/parquet_leaf_reader.h"
+
+namespace parquet {
+class ColumnDescriptor;
+
+namespace internal {
+class RecordReader;
+} // namespace internal
+} // namespace parquet
+
+namespace cctz {
+class time_zone;
+} // namespace cctz
+
+namespace doris::format::parquet {
+
+struct ScalarColumnReaderTestAccess;
+
+//      load_nested_batch() / build_nested_column()
+class ScalarColumnReader final : public ParquetColumnReader {
+    friend class MapColumnReader;
+    friend struct ScalarColumnReaderTestAccess;
+
+public:
+    ScalarColumnReader(const ParquetColumnSchema& column_schema,
+                       std::shared_ptr<::parquet::internal::RecordReader> record_reader,
+                       const ParquetPageSkipPlan* page_skip_plan = nullptr,
+                       const cctz::time_zone* timezone = nullptr, bool enable_strict_mode = false,
+                       ParquetColumnReaderProfile profile = {});
+    ~ScalarColumnReader() override;
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+
+    Status load_nested_batch(int64_t rows) override;
+    Status load_nested_levels_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+    bool is_or_has_repeated_child() const override;
+
+private:
+    Status append_nested_value(int64_t level_idx, MutableColumnPtr& column) const;
+
+    const ::parquet::ColumnDescriptor* descriptor() const { return _descriptor; }
+
+    ParquetLeafReader leaf_reader() const {
+        return ParquetLeafReader(_descriptor, _type_descriptor, _type, _name, _record_reader,
+                                 _profile, _timezone, _enable_strict_mode);
+    }
+
+    void advance_rows_read(int64_t rows);
+    Status skip_records(int64_t rows);
+    int64_t page_filtered_rows_to_skip(int64_t rows) const;
+
+    const ::parquet::ColumnDescriptor* _descriptor = nullptr; // Arrow column descriptor
+    ParquetTypeDescriptor _type_descriptor;                   // type encoding information
+    std::shared_ptr<::parquet::internal::RecordReader>
+            _record_reader; // Arrow physical column reader
+    const ParquetPageSkipPlan* _page_skip_plan =
+            nullptr;                            // page-index pruning result (may be nullptr)
+    const cctz::time_zone* _timezone = nullptr; // timezone
+    bool _enable_strict_mode = false;           // strict mode
+    int64_t _row_group_rows_read = 0;           // rows read in the current row group (cursor)
+    std::unique_ptr<ParquetNestedScalarBatch> _nested_batch; // intermediate result for nested reads
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/struct_column_reader.cpp b/be/src/format_v2/parquet/reader/struct_column_reader.cpp
new file mode 100644
index 00000000000000..66e450c567133a
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/struct_column_reader.cpp
@@ -0,0 +1,258 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/struct_column_reader.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "core/column/column_struct.h"
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+
+namespace doris::format::parquet {
+
+ParquetColumnReader* StructColumnReader::shape_source_reader() const {
+    for (const auto& child : _children) {
+        auto* child_reader = child.get();
+        DORIS_CHECK(child_reader != nullptr);
+        if (!child_reader->is_or_has_repeated_child()) {
+            return child_reader;
+        }
+    }
+    if (_children.empty()) {
+        return nullptr;
+    }
+    return _children[0].get();
+}
+
+Status StructColumnReader::advance_child_past_null_parent(ParquetColumnReader* child_reader,
+                                                          int64_t parent_level_idx) const {
+    DORIS_CHECK(child_reader != nullptr);
+    const int64_t next_child_cursor = parent_level_idx + 1;
+    if (auto* scalar_child = dynamic_cast<ScalarColumnReader*>(child_reader)) {
+        if (next_child_cursor > scalar_child->nested_levels_written()) {
+            return Status::Corruption(
+                    "Parquet STRUCT child {} ended before null parent row in column {}",
+                    scalar_child->name(), _name);
+        }
+        scalar_child->set_nested_build_level_cursor(
+                std::max(scalar_child->nested_build_level_cursor(), next_child_cursor));
+        return Status::OK();
+    }
+    if (auto* struct_child = dynamic_cast<StructColumnReader*>(child_reader);
+        struct_child != nullptr && !struct_child->is_or_has_repeated_child()) {
+        if (next_child_cursor > struct_child->nested_levels_written()) {
+            return Status::Corruption(
+                    "Parquet STRUCT child {} ended before null parent row in column {}",
+                    struct_child->name(), _name);
+        }
+        struct_child->set_nested_build_level_cursor(
+                std::max(struct_child->nested_build_level_cursor(), next_child_cursor));
+        for (auto& grandchild : struct_child->_children) {
+            RETURN_IF_ERROR(struct_child->advance_child_past_null_parent(grandchild.get(),
+                                                                         parent_level_idx));
+        }
+        return Status::OK();
+    }
+
+    int64_t child_cursor = child_reader->nested_build_level_cursor();
+    const auto& child_rep_levels = child_reader->nested_repetition_levels();
+    const int64_t child_levels_written = child_reader->nested_levels_written();
+    while (child_cursor < child_levels_written) {
+        const int16_t child_rep_level = child_rep_levels[child_cursor];
+        ++child_cursor;
+        if (!child_reader->is_or_has_repeated_child() || child_rep_level <= _repetition_level) {
+            break;
+        }
+    }
+    child_reader->set_nested_build_level_cursor(child_cursor);
+    return Status::OK();
+}
+
+Status StructColumnReader::read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) {
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    return build_nested_column(rows, column, rows_read);
+}
+
+Status StructColumnReader::skip(int64_t rows) {
+    if (rows <= 0) {
+        return Status::OK();
+    }
+    auto scratch_column = _type->create_column();
+    RETURN_IF_ERROR(load_nested_batch(rows));
+    int64_t rows_read = 0;
+    RETURN_IF_ERROR(build_nested_column(rows, scratch_column, &rows_read));
+    if (rows_read != rows) {
+        return Status::Corruption("Failed to skip parquet STRUCT column {}: skipped {} of {} rows",
+                                  _name, rows_read, rows);
+    }
+    update_reader_skip_rows(rows);
+    return Status::OK();
+}
+
+Status StructColumnReader::load_nested_batch(int64_t rows) {
+    reset_nested_build_level_cursor();
+    for (auto& child_reader : _children) {
+        DORIS_CHECK(child_reader != nullptr);
+        RETURN_IF_ERROR(child_reader->load_nested_batch(rows));
+    }
+    return Status::OK();
+}
+
+Status StructColumnReader::build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                                               int64_t* values_read) {
+    if (column.get() == nullptr || values_read == nullptr) {
+        return Status::InvalidArgument("Invalid parquet struct build result pointer for column {}",
+                                       _name);
+    }
+    if (_children.empty()) {
+        column->resize(column->size() + static_cast<size_t>(length_upper_bound));
+        *values_read = length_upper_bound;
+        return Status::OK();
+    }
+    auto* struct_column = struct_column_from_output(column);
+    DORIS_CHECK(struct_column != nullptr);
+    auto* parent_null_map = null_map_from_nullable_output(column);
+    auto* shape_reader = shape_source_reader();
+    DORIS_CHECK(shape_reader != nullptr);
+    const auto& def_levels = shape_reader->nested_definition_levels();
+    const auto& rep_levels = shape_reader->nested_repetition_levels();
+    const int64_t levels_written = shape_reader->nested_levels_written();
+
+    NullMap parent_nulls;
+    std::vector<int64_t> parent_level_indices;
+    *values_read = 0;
+    int64_t level_idx = nested_build_level_cursor();
+    while (level_idx < levels_written) {
+        const int64_t current_level_idx = level_idx;
+        const int16_t def_level = def_levels[level_idx];
+        const int16_t rep_level = rep_levels[level_idx];
+        const bool starts_parent =
+                !shape_reader->is_or_has_repeated_child() || rep_level <= _repetition_level;
+        if (starts_parent && *values_read >= length_upper_bound) {
+            break;
+        }
+        ++level_idx;
+        if (def_level < _repeated_ancestor_definition_level) {
+            continue;
+        }
+        if (shape_reader->is_or_has_repeated_child() && rep_level > _repetition_level) {
+            continue;
+        }
+        const bool parent_is_null = def_level < _nullable_definition_level;
+        if (parent_is_null && parent_null_map == nullptr) {
+            return Status::Corruption(
+                    "Parquet STRUCT column {} contains null for non-nullable struct", _name);
+        }
+        parent_nulls.push_back(parent_is_null);
+        parent_level_indices.push_back(current_level_idx);
+        ++*values_read;
+    }
+    set_nested_build_level_cursor(level_idx);
+
+    std::vector<MutableColumnPtr> child_columns;
+    child_columns.reserve(struct_column->get_columns().size());
+    for (size_t child_idx = 0; child_idx < struct_column->get_columns().size(); ++child_idx) {
+        child_columns.push_back(struct_column->get_column_ptr(child_idx)->assert_mutable());
+    }
+    for (size_t child_idx = 0; child_idx < _children.size(); ++child_idx) {
+        const int output_idx = _child_output_indices[child_idx];
+        if (output_idx < 0) {
+            continue;
+        }
+        // STRUCT owns row alignment. Child readers consume only present parent rows from their
+        // level streams; null STRUCT parents become default placeholders in every child column.
+        // This mirrors Arrow's separation between struct validity and child array materialization,
+        // and avoids asking scalar/list/map children to invent values for an absent parent.
+        int64_t pending_present_rows = 0;
+        int64_t total_child_rows = 0;
+        auto flush_present_rows = [&]() -> Status {
+            if (pending_present_rows == 0) {
+                return Status::OK();
+            }
+            int64_t child_rows = 0;
+            RETURN_IF_ERROR(_children[child_idx]->build_nested_column(
+                    pending_present_rows, child_columns[output_idx], &child_rows));
+            if (child_rows != pending_present_rows) {
+                return Status::Corruption(
+                        "Parquet STRUCT child {} built {} rows, expected {} for column {}",
+                        _children[child_idx]->name(), child_rows, pending_present_rows, _name);
+            }
+            total_child_rows += child_rows;
+            pending_present_rows = 0;
+            return Status::OK();
+        };
+        for (size_t parent_idx = 0; parent_idx < parent_nulls.size(); ++parent_idx) {
+            const auto parent_is_null = parent_nulls[parent_idx];
+            if (!parent_is_null) {
+                ++pending_present_rows;
+                continue;
+            }
+            RETURN_IF_ERROR(flush_present_rows());
+            child_columns[output_idx]->insert_default();
+            RETURN_IF_ERROR(advance_child_past_null_parent(_children[child_idx].get(),
+                                                           parent_level_indices[parent_idx]));
+            ++total_child_rows;
+        }
+        RETURN_IF_ERROR(flush_present_rows());
+        if (total_child_rows != *values_read) {
+            return Status::Corruption(
+                    "Parquet STRUCT child {} built {} rows, expected {} for column {}",
+                    _children[child_idx]->name(), total_child_rows, *values_read, _name);
+        }
+    }
+    for (size_t child_idx = 0; child_idx < child_columns.size(); ++child_idx) {
+        struct_column->get_column_ptr(child_idx) = std::move(child_columns[child_idx]);
+    }
+    append_parent_nulls(parent_null_map, parent_nulls);
+    return Status::OK();
+}
+
+const std::vector<int16_t>& StructColumnReader::nested_definition_levels() const {
+    auto* shape_reader = shape_source_reader();
+    DORIS_CHECK(shape_reader != nullptr);
+    return shape_reader->nested_definition_levels();
+}
+
+const std::vector<int16_t>& StructColumnReader::nested_repetition_levels() const {
+    auto* shape_reader = shape_source_reader();
+    DORIS_CHECK(shape_reader != nullptr);
+    return shape_reader->nested_repetition_levels();
+}
+
+int64_t StructColumnReader::nested_levels_written() const {
+    auto* shape_reader = shape_source_reader();
+    DORIS_CHECK(shape_reader != nullptr);
+    return shape_reader->nested_levels_written();
+}
+
+bool StructColumnReader::is_or_has_repeated_child() const {
+    auto* shape_reader = shape_source_reader();
+    return shape_reader != nullptr && shape_reader->is_or_has_repeated_child();
+}
+
+void StructColumnReader::advance_nested_build_level_cursor_past_parent(
+        int16_t parent_repetition_level) {
+    ParquetColumnReader::advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    for (auto& child : _children) {
+        DORIS_CHECK(child != nullptr);
+        child->advance_nested_build_level_cursor_past_parent(parent_repetition_level);
+    }
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/reader/struct_column_reader.h b/be/src/format_v2/parquet/reader/struct_column_reader.h
new file mode 100644
index 00000000000000..3e88b75cede3d9
--- /dev/null
+++ b/be/src/format_v2/parquet/reader/struct_column_reader.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+
+class StructColumnReader final : public ParquetColumnReader {
+public:
+    StructColumnReader(const ParquetColumnSchema& schema, DataTypePtr type,
+                       std::vector<std::unique_ptr<ParquetColumnReader>> children,
+                       std::vector<int> child_output_indices,
+                       ParquetColumnReaderProfile profile = {})
+            : ParquetColumnReader(schema, type, profile),
+              _children(std::move(children)),
+              _child_output_indices(std::move(child_output_indices)) {
+        DCHECK_EQ(_children.size(), _child_output_indices.size());
+    }
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override;
+    Status skip(int64_t rows) override;
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+    bool is_or_has_repeated_child() const override;
+    void advance_nested_build_level_cursor_past_parent(int16_t parent_repetition_level) override;
+
+private:
+    ParquetColumnReader* shape_source_reader() const;
+    Status advance_child_past_null_parent(ParquetColumnReader* child_reader,
+                                          int64_t parent_level_idx) const;
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> _children; // projected child readers
+    std::vector<int> _child_output_indices; // child reader -> struct output position mapping
+};
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/parquet/selection_vector.h b/be/src/format_v2/parquet/selection_vector.h
new file mode 100644
index 00000000000000..589154d4acc0e4
--- /dev/null
+++ b/be/src/format_v2/parquet/selection_vector.h
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//   http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <utility>
+#include <vector>
+
+#include "common/check.h"
+#include "common/status.h"
+
+namespace doris::format::parquet {
+
+struct RowRange {
+    int64_t start = 0;
+    int64_t length = 0;
+};
+
+struct ParquetPageSkipPlan {
+    int leaf_column_id = -1;
+    // Page ordinal is the data-page ordinal in the column chunk. It intentionally excludes
+    // dictionary pages, matching Arrow PageReader::set_data_page_filter().
+    std::vector<uint8_t> skipped_pages;
+    std::vector<int64_t> skipped_page_compressed_sizes;
+    // Row ranges covered by skipped data pages. ScalarColumnReader uses these ranges to avoid
+    // calling RecordReader::SkipRecords() again for pages already skipped by Arrow.
+    std::vector<RowRange> skipped_ranges;
+
+    bool empty() const { return skipped_ranges.empty(); }
+
+    bool should_skip_page(size_t page_idx) const {
+        return page_idx < skipped_pages.size() && skipped_pages[page_idx] != 0;
+    }
+
+    int64_t skipped_page_compressed_size(size_t page_idx) const {
+        DCHECK_LT(page_idx, skipped_page_compressed_sizes.size());
+        return skipped_page_compressed_sizes[page_idx];
+    }
+};
+
+class SelectionVector {
+public:
+    using Index = uint16_t;
+
+    SelectionVector() = default;
+
+    explicit SelectionVector(size_t count) { resize(count); }
+
+    SelectionVector(Index* data, size_t count) { initialize(data, count); }
+
+    void initialize(Index* data, size_t count) {
+        _owned.clear();
+        _data = data;
+        _size = count;
+    }
+
+    void resize(size_t count) {
+        _owned.resize(count);
+        _data = _owned.data();
+        _size = count;
+        for (size_t idx = 0; idx < count; ++idx) {
+            _data[idx] = static_cast<Index>(idx);
+        }
+    }
+
+    void clear() {
+        _owned.clear();
+        _data = nullptr;
+        _size = 0;
+    }
+
+    size_t size() const { return _size; }
+
+    bool is_set() const { return _data != nullptr; }
+
+    Index* data() { return _data; }
+
+    const Index* data() const { return _data; }
+
+    size_t get_index(size_t idx) const {
+        if (_data == nullptr) {
+            return idx;
+        }
+        return _data[idx];
+    }
+
+    void set_index(size_t idx, Index value) { _data[idx] = value; }
+
+    Status verify(size_t count, int64_t batch_rows) const {
+        if (batch_rows < 0) {
+            return Status::InvalidArgument("Negative parquet selection batch rows {}", batch_rows);
+        }
+        if (std::cmp_greater(count, batch_rows)) {
+            return Status::InvalidArgument("Parquet selection count {} exceeds batch rows {}",
+                                           count, batch_rows);
+        }
+        if (_data != nullptr && count > _size) {
+            return Status::InvalidArgument("Parquet selection count {} exceeds vector size {}",
+                                           count, _size);
+        }
+        size_t previous = 0;
+        for (size_t idx = 0; idx < count; ++idx) {
+            const size_t current = get_index(idx);
+            if (std::cmp_greater_equal(current, batch_rows)) {
+                return Status::InvalidArgument(
+                        "Parquet selection index {} out of range [0, {}) at position {}", current,
+                        batch_rows, idx);
+            }
+            if (idx > 0 && current <= previous) {
+                return Status::InvalidArgument(
+                        "Parquet selection index {} is not strictly greater than previous {} at "
+                        "position {}",
+                        current, previous, idx);
+            }
+            previous = current;
+        }
+        return Status::OK();
+    }
+
+private:
+    std::vector<Index> _owned;
+    Index* _data = nullptr;
+    size_t _size = 0;
+};
+
+inline std::vector<RowRange> selection_to_ranges(const SelectionVector& selection,
+                                                 uint16_t selected_rows) {
+    std::vector<RowRange> ranges;
+    if (selected_rows == 0) {
+        return ranges;
+    }
+
+    int64_t range_start = selection.get_index(0);
+    int64_t previous = selection.get_index(0);
+    for (uint16_t selection_idx = 1; selection_idx < selected_rows; ++selection_idx) {
+        const int64_t current = selection.get_index(selection_idx);
+        if (current == previous + 1) {
+            previous = current;
+            continue;
+        }
+        ranges.push_back(RowRange {.start = range_start, .length = previous - range_start + 1});
+        range_start = current;
+        previous = current;
+    }
+    ranges.push_back(RowRange {.start = range_start, .length = previous - range_start + 1});
+    return ranges;
+}
+
+} // namespace doris::format::parquet
diff --git a/be/src/format_v2/schema_projection.cpp b/be/src/format_v2/schema_projection.cpp
new file mode 100644
index 00000000000000..342f4c91898c92
--- /dev/null
+++ b/be/src/format_v2/schema_projection.cpp
@@ -0,0 +1,147 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/schema_projection.h"
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "core/assert_cast.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+
+namespace doris::format {
+namespace {
+
+// Rebuild the complex DataType for one already-pruned semantic ColumnDefinition node.
+//
+// The caller has already matched the projection against ColumnDefinition::children and preserved
+// the file-local child order. This helper only mirrors those projected semantic children into the
+// node type. It intentionally does not understand physical format wrappers. In particular, a MAP
+// node is expected to have semantic children [key, value], even if the underlying format stores a
+// wrapper such as Parquet key_value/entry.
+Status rebuild_semantic_projected_type(const DataTypePtr& original_type,
+                                       const std::vector<ColumnDefinition>& projected_children,
+                                       DataTypePtr* projected_type) {
+    DORIS_CHECK(original_type != nullptr);
+    DORIS_CHECK(projected_type != nullptr);
+
+    DataTypePtr nested_projected_type;
+    const auto primitive_type = remove_nullable(original_type)->get_primitive_type();
+    switch (primitive_type) {
+    case TYPE_STRUCT: {
+        DataTypes child_types;
+        Strings child_names;
+        child_types.reserve(projected_children.size());
+        child_names.reserve(projected_children.size());
+        for (const auto& child : projected_children) {
+            child_types.push_back(child.type);
+            child_names.push_back(child.name);
+        }
+        nested_projected_type = std::make_shared<DataTypeStruct>(child_types, child_names);
+        break;
+    }
+    case TYPE_ARRAY:
+        DORIS_CHECK(projected_children.size() == 1);
+        nested_projected_type = std::make_shared<DataTypeArray>(projected_children[0].type);
+        break;
+    case TYPE_MAP: {
+        DORIS_CHECK(remove_nullable(original_type)->get_primitive_type() == TYPE_MAP);
+        const auto* original_map_type =
+                assert_cast<const DataTypeMap*>(remove_nullable(original_type).get());
+        DataTypePtr key_type = original_map_type->get_key_type();
+        DataTypePtr value_type;
+        for (const auto& child : projected_children) {
+            // Partial MAP projection only prunes the value subtree. The key stream must remain
+            // complete because it defines entry existence and offsets when materializing ColumnMap;
+            // the projected DataTypeMap also preserves the original key type instead of rebuilding
+            // it from children. If a caller includes key in the semantic child list, ignore it
+            // here; the presence of a value child still decides the projected value shape.
+            if (child.file_local_id() == 0 || child.name == "key") {
+                continue;
+            }
+            if (child.file_local_id() == 1 || child.name == "value") {
+                value_type = child.type;
+            }
+        }
+        if (value_type == nullptr) {
+            return Status::NotSupported("MAP projection for type {} contains no value child",
+                                        original_type->get_name());
+        }
+        nested_projected_type = std::make_shared<DataTypeMap>(key_type, value_type);
+        break;
+    }
+    default:
+        return Status::InvalidArgument("Cannot project children from non-complex type {}",
+                                       original_type->get_name());
+    }
+
+    *projected_type = original_type->is_nullable() ? make_nullable(nested_projected_type)
+                                                   : nested_projected_type;
+    return Status::OK();
+}
+
+} // namespace
+
+Status project_column_definition(const ColumnDefinition& field, const LocalColumnIndex& projection,
+                                 ColumnDefinition* projected_field) {
+    if (projected_field == nullptr) {
+        return Status::InvalidArgument("projected_field is null");
+    }
+    *projected_field = field;
+    if (projection.project_all_children || projection.children.empty()) {
+        return Status::OK();
+    }
+
+    projected_field->children.clear();
+    for (const auto& child_projection : projection.children) {
+        if (child_projection.local_id() == -1) {
+            return Status::InvalidArgument("Empty projection path for field {}", field.name);
+        }
+        const auto child_it =
+                std::ranges::find_if(field.children, [&](const ColumnDefinition& child) {
+                    return child.file_local_id() == child_projection.local_id();
+                });
+        if (child_it == field.children.end()) {
+            return Status::InvalidArgument("Invalid projection child id {} for field {}",
+                                           child_projection.local_id(), field.name);
+        }
+    }
+    for (const auto& child : field.children) {
+        const auto child_projection_it =
+                std::ranges::find_if(projection.children, [&](const LocalColumnIndex& child_proj) {
+                    return child_proj.local_id() == child.file_local_id();
+                });
+        if (child_projection_it == projection.children.end()) {
+            continue;
+        }
+        ColumnDefinition projected_child;
+        RETURN_IF_ERROR(project_column_definition(child, *child_projection_it, &projected_child));
+        projected_field->children.push_back(std::move(projected_child));
+    }
+    if (projected_field->children.empty()) {
+        return Status::NotSupported("Projection for field {} contains no children", field.name);
+    }
+
+    return rebuild_semantic_projected_type(field.type, projected_field->children,
+                                           &projected_field->type);
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/schema_projection.h b/be/src/format_v2/schema_projection.h
new file mode 100644
index 00000000000000..c2125d66931631
--- /dev/null
+++ b/be/src/format_v2/schema_projection.h
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "format_v2/file_reader.h"
+
+namespace doris::format {
+
+// Build a projected file-local semantic schema node from a full schema node and a nested
+// LocalColumnIndex projection.
+//
+// This module is deliberately about semantic ColumnDefinition trees, not physical file-format
+// trees. FileReader::get_schema() returns file-local columns after type conversion to Doris
+// DataType, and their children must follow Doris semantics:
+//
+//   STRUCT children = fields
+//   ARRAY children = [element]
+//   MAP children = [key, value]
+//
+// Format-specific wrappers, such as Parquet MAP key_value/entry nodes, are intentionally hidden
+// from this API. A format reader that needs those wrappers for its physical reader tree should
+// translate the semantic projection back to its physical layout internally.
+//
+// The function does three things:
+// - Copies `field` metadata to `projected_field`.
+// - Recursively prunes children according to `projection.children`, matching children by
+//   ColumnDefinition::file_local_id() rather than vector ordinal. The root projection id is not
+//   interpreted here because the caller has already selected `field`.
+// - Rebuilds the node DataType from the projected semantic children so the returned definition is
+//   self-consistent. STRUCT uses projected child names/types, ARRAY uses the projected element
+//   type, and MAP preserves the original key type while rebuilding the projected value type.
+//
+// A full projection copies `field` unchanged. Partial MAP projection only uses the value child for
+// type rebuilding. MAP is materialized as offsets + keys + values, so the reader must still read
+// the complete key stream to build entry shape and offsets. If the semantic projection includes
+// the key child, it is ignored here; key-only MAP projections are rejected because they do not
+// define a value shape.
+Status project_column_definition(const ColumnDefinition& field, const LocalColumnIndex& projection,
+                                 ColumnDefinition* projected_field);
+
+} // namespace doris::format
diff --git a/be/src/format_v2/table/hive_reader.cpp b/be/src/format_v2/table/hive_reader.cpp
new file mode 100644
index 00000000000000..71a5a7ad57cfd9
--- /dev/null
+++ b/be/src/format_v2/table/hive_reader.cpp
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/hive_reader.h"
+
+#include "common/consts.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/file_reader.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format::hive {
+namespace {
+
+TFileFormatType::type format_type_from_context(const format::ProjectedColumnBuildContext& context) {
+    DORIS_CHECK(context.scan_params != nullptr);
+    if (context.range != nullptr && context.range->__isset.format_type) {
+        return context.range->format_type;
+    }
+    return context.scan_params->format_type;
+}
+
+bool use_column_position_mapping(const format::ProjectedColumnBuildContext& context) {
+    if (context.runtime_state == nullptr || context.scan_params == nullptr) {
+        return false;
+    }
+    switch (format_type_from_context(context)) {
+    case TFileFormatType::FORMAT_PARQUET:
+        return !context.runtime_state->query_options().hive_parquet_use_column_names;
+    default:
+        return false;
+    }
+}
+
+bool is_file_column_position_slot(const TFileScanSlotInfo& slot_info,
+                                  const std::string& column_name) {
+    if (column_name.starts_with(BeConsts::GLOBAL_ROWID_COL) ||
+        column_name == BeConsts::ICEBERG_ROWID_COL) {
+        return false;
+    }
+    if (slot_info.__isset.is_file_slot) {
+        return slot_info.is_file_slot;
+    }
+    return !slot_info.__isset.category || slot_info.category != TColumnCategory::PARTITION_KEY;
+}
+
+} // namespace
+
+Status HiveReader::prepare_split(const format::SplitReadOptions& options) {
+    if (options.current_split_format != _format) {
+        return Status::InternalError(
+                "Hive scan expects all splits to use the same file format, "
+                "initialized_format={}, current_split_format={}",
+                static_cast<int>(_format), static_cast<int>(options.current_split_format));
+    }
+    return format::TableReader::prepare_split(options);
+}
+
+format::TableColumnMappingMode HiveReader::mapping_mode() const {
+    // Hive-specific behavior: choose the column matching mode based on file format and the
+    // matching session variable.
+    //   - hive_orc_use_column_names / hive_parquet_use_column_names == true
+    //     => BY_NAME (modern Hive default, match by column name)
+    //   - those options == false
+    //     => BY_INDEX (mainly for Hive1 ORC `_col0` / `_col1`, match by top-level position;
+    //                  Parquet exposes the same switch for consistency)
+    // TableReader updates `_format` in prepare_split(), so this is evaluated per split.
+    DORIS_CHECK(_runtime_state != nullptr);
+    const auto& query_options = _runtime_state->query_options();
+    bool use_column_names = true;
+    if (_format == format::FileFormat::ORC) {
+        use_column_names = query_options.hive_orc_use_column_names;
+    } else if (_format == format::FileFormat::PARQUET) {
+        use_column_names = query_options.hive_parquet_use_column_names;
+    } else if (_format == format::FileFormat::CSV || _format == format::FileFormat::TEXT ||
+               _format == format::FileFormat::JSON) {
+        // Hive CSV/TEXT/JSON readers synthesize a file-local schema from FE-provided file slots
+        // because these formats do not carry embedded column names or field ids. The scan params'
+        // format-specific attributes still tell the physical reader how to read values, while the
+        // table-level mapper can safely match the synthesized file schema by table column name.
+        use_column_names = true;
+    } else {
+        DORIS_CHECK(false) << "HiveReader does not support this file reader format";
+    }
+
+    return use_column_names ? format::TableColumnMappingMode::BY_NAME
+                            : format::TableColumnMappingMode::BY_INDEX;
+}
+
+Status HiveReader::annotate_projected_column(const TFileScanSlotInfo& slot_info,
+                                             format::ProjectedColumnBuildContext* context,
+                                             format::ColumnDefinition* column) const {
+    RETURN_IF_ERROR(format::TableReader::annotate_projected_column(slot_info, context, column));
+    DORIS_CHECK(context != nullptr);
+    DORIS_CHECK(column != nullptr);
+    if (!use_column_position_mapping(*context) ||
+        !is_file_column_position_slot(slot_info, column->name)) {
+        return Status::OK();
+    }
+    const auto* scan_params = context->scan_params;
+    DORIS_CHECK(scan_params != nullptr);
+    if (!scan_params->__isset.column_idxs ||
+        context->next_file_column_idx >= scan_params->column_idxs.size()) {
+        return Status::InvalidArgument(
+                "Hive positional column mapping is missing file index for column '{}', "
+                "required file slot ordinal={}, column_idxs_size={}",
+                column->name, context->next_file_column_idx,
+                scan_params->__isset.column_idxs ? scan_params->column_idxs.size() : 0);
+    }
+    const auto file_index = scan_params->column_idxs[context->next_file_column_idx];
+    if (file_index < 0) {
+        return Status::InvalidArgument(
+                "Hive positional column mapping has negative file index {} for column '{}'",
+                file_index, column->name);
+    }
+    column->identifier = Field::create_field<TYPE_INT>(file_index);
+    ++context->next_file_column_idx;
+    return Status::OK();
+}
+
+Status HiveReader::validate_projected_columns(
+        const format::ProjectedColumnBuildContext& context) const {
+    if (!use_column_position_mapping(context)) {
+        return Status::OK();
+    }
+    DORIS_CHECK(context.scan_params != nullptr);
+    if (context.scan_params->__isset.column_idxs &&
+        context.next_file_column_idx != context.scan_params->column_idxs.size()) {
+        return Status::InvalidArgument(
+                "Hive positional column mapping has unused file indexes: consumed={}, "
+                "column_idxs_size={}",
+                context.next_file_column_idx, context.scan_params->column_idxs.size());
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format::hive
diff --git a/be/src/format_v2/table/hive_reader.h b/be/src/format_v2/table/hive_reader.h
new file mode 100644
index 00000000000000..50d21c663cc542
--- /dev/null
+++ b/be/src/format_v2/table/hive_reader.h
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "format_v2/table_reader.h"
+
+namespace doris::format::hive {
+// now hive self only support mixed with orc/parquet files in table and different partitions.
+// But if mixed with orc/parquet files in table and same partition, will failed when read.
+// now fe will plan table format for all files dirctly, and BE could not handle mixed files also.
+class HiveReader final : public format::TableReader {
+public:
+    ENABLE_FACTORY_CREATOR(HiveReader);
+    ~HiveReader() final = default;
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+    format::TableColumnMappingMode mapping_mode() const override;
+    Status annotate_projected_column(const TFileScanSlotInfo& slot_info,
+                                     format::ProjectedColumnBuildContext* context,
+                                     format::ColumnDefinition* column) const override;
+    Status validate_projected_columns(
+            const format::ProjectedColumnBuildContext& context) const override;
+};
+
+} // namespace doris::format::hive
diff --git a/be/src/format_v2/table/hudi_reader.cpp b/be/src/format_v2/table/hudi_reader.cpp
new file mode 100644
index 00000000000000..d76be201067bd7
--- /dev/null
+++ b/be/src/format_v2/table/hudi_reader.cpp
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/hudi_reader.h"
+
+#include <utility>
+
+#include "exprs/vexpr_context.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/jni/hudi_jni_reader.h"
+#include "format_v2/table/schema_history_util.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::hudi {
+
+Status HudiReader::prepare_split(const format::SplitReadOptions& options) {
+    _split_schema_id = -1;
+    if (options.current_range.__isset.table_format_params &&
+        options.current_range.table_format_params.__isset.hudi_params &&
+        options.current_range.table_format_params.hudi_params.__isset.schema_id) {
+        _split_schema_id = options.current_range.table_format_params.hudi_params.schema_id;
+    }
+    return format::TableReader::prepare_split(options);
+}
+
+format::TableColumnMappingMode HudiReader::mapping_mode() const {
+    return format::can_map_by_history_schema(_scan_params, _split_schema_id)
+                   ? format::TableColumnMappingMode::BY_FIELD_ID
+                   : format::TableColumnMappingMode::BY_NAME;
+}
+
+Status HudiReader::annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
+    DORIS_CHECK(file_schema != nullptr);
+    if (mapping_mode() != format::TableColumnMappingMode::BY_FIELD_ID) {
+        return Status::OK();
+    }
+    return format::annotate_file_schema_from_history(_scan_params, _split_schema_id, file_schema);
+}
+
+Status HudiHybridReader::init(format::TableReadOptions&& options) {
+    return format::TableReader::init(std::move(options));
+}
+
+Status HudiHybridReader::prepare_split(const format::SplitReadOptions& options) {
+    RETURN_IF_ERROR(_ensure_current_split_reader(options));
+    DORIS_CHECK(_current_split_reader != nullptr);
+    return _current_split_reader->prepare_split(options);
+}
+
+Status HudiHybridReader::get_block(Block* block, bool* eos) {
+    DORIS_CHECK(_current_split_reader != nullptr);
+    return _current_split_reader->get_block(block, eos);
+}
+
+Status HudiHybridReader::close() {
+    Status close_status = Status::OK();
+    if (_native_reader != nullptr) {
+        close_status = _native_reader->close();
+    }
+    if (_jni_reader != nullptr) {
+        auto status = _jni_reader->close();
+        if (!status.ok() && close_status.ok()) {
+            close_status = std::move(status);
+        }
+    }
+    _current_split_reader = nullptr;
+    return close_status;
+}
+
+Status HudiHybridReader::_ensure_current_split_reader(const format::SplitReadOptions& options) {
+    DORIS_CHECK(_scan_params != nullptr);
+    if (_is_jni_split(*_scan_params, options.current_range)) {
+        if (_jni_reader == nullptr) {
+            _jni_reader = std::make_unique<format::hudi::HudiJniReader>();
+            RETURN_IF_ERROR(_init_child_reader(_jni_reader.get(), format::FileFormat::JNI));
+        }
+        _current_split_reader = _jni_reader.get();
+    } else {
+        format::FileFormat file_format;
+        RETURN_IF_ERROR(_to_file_format(*_scan_params, options.current_range, &file_format));
+        if (_native_reader == nullptr) {
+            _native_reader = format::hudi::HudiReader::create_unique();
+            RETURN_IF_ERROR(_init_child_reader(_native_reader.get(), file_format));
+        }
+        _current_split_reader = _native_reader.get();
+    }
+    return Status::OK();
+}
+
+Status HudiHybridReader::_init_child_reader(format::TableReader* reader,
+                                            format::FileFormat file_format) {
+    DORIS_CHECK(reader != nullptr);
+    VExprContextSPtrs conjuncts;
+    RETURN_IF_ERROR(_clone_conjuncts(&conjuncts));
+    return reader->init({
+            .projected_columns = _projected_columns,
+            .column_predicates = _table_column_predicates,
+            .conjuncts = std::move(conjuncts),
+            .format = file_format,
+            .scan_params = _scan_params,
+            .io_ctx = _io_ctx,
+            .runtime_state = _runtime_state,
+            .scanner_profile = _scanner_profile,
+            .push_down_agg_type = _push_down_agg_type,
+            .condition_cache_digest = _condition_cache_digest,
+    });
+}
+
+Status HudiHybridReader::_clone_conjuncts(VExprContextSPtrs* conjuncts) const {
+    DORIS_CHECK(conjuncts != nullptr);
+    conjuncts->clear();
+    conjuncts->reserve(_conjuncts.size());
+    for (const auto& conjunct : _conjuncts) {
+        VExprSPtr root;
+        RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root));
+        conjuncts->push_back(VExprContext::create_shared(std::move(root)));
+    }
+    return Status::OK();
+}
+
+TFileFormatType::type HudiHybridReader::_range_format_type(const TFileScanRangeParams& params,
+                                                           const TFileRangeDesc& range) {
+    return range.__isset.format_type ? range.format_type : params.format_type;
+}
+
+bool HudiHybridReader::_is_jni_split(const TFileScanRangeParams& params,
+                                     const TFileRangeDesc& range) {
+    return _range_format_type(params, range) == TFileFormatType::FORMAT_JNI;
+}
+
+Status HudiHybridReader::_to_file_format(const TFileScanRangeParams& params,
+                                         const TFileRangeDesc& range,
+                                         format::FileFormat* file_format) {
+    DORIS_CHECK(file_format != nullptr);
+    const auto format_type = _range_format_type(params, range);
+    switch (format_type) {
+    case TFileFormatType::FORMAT_PARQUET:
+        *file_format = format::FileFormat::PARQUET;
+        return Status::OK();
+    case TFileFormatType::FORMAT_ORC:
+        *file_format = format::FileFormat::ORC;
+        return Status::OK();
+    default:
+        return Status::NotSupported("Unsupported native Hudi file format {}",
+                                    to_string(format_type));
+    }
+}
+
+} // namespace doris::format::hudi
diff --git a/be/src/format_v2/table/hudi_reader.h b/be/src/format_v2/table/hudi_reader.h
new file mode 100644
index 00000000000000..aeaaedf6ab6064
--- /dev/null
+++ b/be/src/format_v2/table/hudi_reader.h
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "format_v2/table_reader.h"
+
+namespace doris::format::hudi {
+
+class HudiReader final : public format::TableReader {
+public:
+    ENABLE_FACTORY_CREATOR(HudiReader);
+    ~HudiReader() final = default;
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+
+#ifdef BE_TEST
+    void TEST_set_scan_params(TFileScanRangeParams* params) { _scan_params = params; }
+    format::TableColumnMappingMode TEST_mapping_mode() const { return mapping_mode(); }
+    Status TEST_annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
+        return annotate_file_schema(file_schema);
+    }
+#endif
+
+protected:
+    format::TableColumnMappingMode mapping_mode() const override;
+    Status annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) override;
+
+private:
+    int64_t _split_schema_id = -1;
+};
+
+// Hudi MOR scans can contain both JNI splits that need log-file merge semantics and native
+// data-file splits without delta logs in the same SplitSource. FileScannerV2 owns one table reader
+// for the scanner lifetime, so this reader keeps native and JNI child readers internally and
+// dispatches each split to the matching child reader.
+class HudiHybridReader final : public format::TableReader {
+public:
+    ~HudiHybridReader() override = default;
+
+    Status init(format::TableReadOptions&& options) override;
+    Status prepare_split(const format::SplitReadOptions& options) override;
+    Status get_block(Block* block, bool* eos) override;
+    Status close() override;
+
+private:
+    Status _ensure_current_split_reader(const format::SplitReadOptions& options);
+    Status _init_child_reader(format::TableReader* reader, format::FileFormat file_format);
+    Status _clone_conjuncts(VExprContextSPtrs* conjuncts) const;
+    static TFileFormatType::type _range_format_type(const TFileScanRangeParams& params,
+                                                    const TFileRangeDesc& range);
+    static bool _is_jni_split(const TFileScanRangeParams& params, const TFileRangeDesc& range);
+    static Status _to_file_format(const TFileScanRangeParams& params, const TFileRangeDesc& range,
+                                  format::FileFormat* file_format);
+
+    std::unique_ptr<format::TableReader> _native_reader; // handle native parquet/orc splits
+    std::unique_ptr<format::TableReader> _jni_reader;    // handle MOR JNI splits
+    format::TableReader* _current_split_reader = nullptr;
+};
+
+} // namespace doris::format::hudi
diff --git a/be/src/format_v2/table/iceberg_reader.cpp b/be/src/format_v2/table/iceberg_reader.cpp
new file mode 100644
index 00000000000000..ccc100f05044cc
--- /dev/null
+++ b/be/src/format_v2/table/iceberg_reader.cpp
@@ -0,0 +1,797 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/iceberg_reader.h"
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+#include <sstream>
+#include <utility>
+
+#include "common/cast_set.h"
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_const.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/define_primitive_type.h"
+#include "core/field.h"
+#include "exprs/vslot_ref.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format_v2/expr/cast.h"
+#include "format_v2/expr/equality_delete_predicate.h"
+#include "format_v2/parquet/parquet_reader.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/table_reader.h"
+#include "io/file_factory.h"
+
+namespace doris::format::iceberg {
+
+static constexpr const char* ROW_LINEAGE_ROW_ID = "_row_id";
+static constexpr int32_t ROW_LINEAGE_ROW_ID_FIELD_ID = 2147483540;
+
+template <typename T>
+static std::string join_values_for_debug(const std::vector<T>& values) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t idx = 0; idx < values.size(); ++idx) {
+        if (idx > 0) {
+            out << ", ";
+        }
+        out << values[idx];
+    }
+    out << "]";
+    return out.str();
+}
+
+static bool is_projected_row_lineage_row_id(const format::ColumnDefinition& column) {
+    // Iceberg row lineage columns can be bound by field id when a mapper has already been built,
+    // but customize_file_scan_request() is also exercised directly by scan-request tests before the
+    // mapper exists. In that path, inspect the projected table schema so row-position dependencies
+    // are still added for `_row_id`.
+    return column.name == ROW_LINEAGE_ROW_ID ||
+           (column.has_identifier_field_id() &&
+            column.get_identifier_field_id() == ROW_LINEAGE_ROW_ID_FIELD_ID);
+}
+
+static bool is_projected_iceberg_rowid(const format::ColumnDefinition& column) {
+    return column.name == BeConsts::ICEBERG_ROWID_COL;
+}
+
+static std::string iceberg_delete_file_debug_string(const TIcebergDeleteFileDesc& delete_file) {
+    std::ostringstream out;
+    out << "TIcebergDeleteFileDesc{path=" << (delete_file.__isset.path ? delete_file.path : "null")
+        << ", content=" << (delete_file.__isset.content ? delete_file.content : -1)
+        << ", file_format="
+        << (delete_file.__isset.file_format ? static_cast<int>(delete_file.file_format) : -1)
+        << ", position_lower_bound="
+        << (delete_file.__isset.position_lower_bound ? delete_file.position_lower_bound : -1)
+        << ", position_upper_bound="
+        << (delete_file.__isset.position_upper_bound ? delete_file.position_upper_bound : -1)
+        << ", field_ids="
+        << (delete_file.__isset.field_ids ? join_values_for_debug(delete_file.field_ids) : "[]")
+        << ", content_offset="
+        << (delete_file.__isset.content_offset ? delete_file.content_offset : -1)
+        << ", content_size_in_bytes="
+        << (delete_file.__isset.content_size_in_bytes ? delete_file.content_size_in_bytes : -1)
+        << "}";
+    return out.str();
+}
+
+static std::string iceberg_delete_files_debug_string(
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t idx = 0; idx < delete_files.size(); ++idx) {
+        if (idx > 0) {
+            out << ", ";
+        }
+        out << iceberg_delete_file_debug_string(delete_files[idx]);
+    }
+    out << "]";
+    return out.str();
+}
+
+static std::string iceberg_params_debug_string(const std::optional<TIcebergFileDesc>& params) {
+    if (!params.has_value()) {
+        return "null";
+    }
+    const auto& iceberg_params = *params;
+    std::ostringstream out;
+    out << "TIcebergFileDesc{format_version="
+        << (iceberg_params.__isset.format_version ? iceberg_params.format_version : -1)
+        << ", content=" << (iceberg_params.__isset.content ? iceberg_params.content : -1)
+        << ", original_file_path="
+        << (iceberg_params.__isset.original_file_path ? iceberg_params.original_file_path : "null")
+        << ", row_count=" << (iceberg_params.__isset.row_count ? iceberg_params.row_count : -1)
+        << ", partition_spec_id="
+        << (iceberg_params.__isset.partition_spec_id ? iceberg_params.partition_spec_id : 0)
+        << ", has_partition_data_json=" << iceberg_params.__isset.partition_data_json
+        << ", first_row_id="
+        << (iceberg_params.__isset.first_row_id ? iceberg_params.first_row_id : -1)
+        << ", last_updated_sequence_number="
+        << (iceberg_params.__isset.last_updated_sequence_number
+                    ? iceberg_params.last_updated_sequence_number
+                    : -1)
+        << ", delete_file_count="
+        << (iceberg_params.__isset.delete_files ? iceberg_params.delete_files.size() : 0)
+        << ", delete_files="
+        << (iceberg_params.__isset.delete_files
+                    ? iceberg_delete_files_debug_string(iceberg_params.delete_files)
+                    : "[]")
+        << ", has_serialized_split=" << iceberg_params.__isset.serialized_split << "}";
+    return out.str();
+}
+
+IcebergTableReader::PositionDeleteRowsCollector::PositionDeleteRowsCollector(
+        std::string data_file_path, format::DeleteRows* rows)
+        : _data_file_path(std::move(data_file_path)), _rows(rows) {}
+
+Status IcebergTableReader::PositionDeleteRowsCollector::collect(const Block& block,
+                                                                size_t read_rows) {
+    if (read_rows == 0) {
+        return Status::OK();
+    }
+    const auto& file_path_column = assert_cast<const ColumnString&>(
+            *remove_nullable((block.get_by_position(ICEBERG_FILE_PATH_BLOCK_POSITION).column)));
+    const auto& pos_column = assert_cast<const ColumnInt64&>(
+            *remove_nullable(block.get_by_position(ICEBERG_ROW_POS_BLOCK_POSITION).column));
+    for (size_t row = 0; row < read_rows; ++row) {
+        const auto file_path = file_path_column.get_data_at(row).to_string();
+        if (file_path == _data_file_path) {
+            _rows->push_back(pos_column.get_element(row));
+        }
+    }
+    return Status::OK();
+}
+
+Status IcebergTableReader::prepare_split(const format::SplitReadOptions& options) {
+    _row_lineage_columns = {};
+    _iceberg_params.reset();
+    _delete_predicates_initialized = false;
+    _position_delete_rows_storage.clear();
+    _equality_delete_filters.clear();
+    if (options.current_range.__isset.table_format_params &&
+        options.current_range.table_format_params.__isset.iceberg_params) {
+        const auto& iceberg_params = options.current_range.table_format_params.iceberg_params;
+        _iceberg_params = iceberg_params;
+        if (iceberg_params.__isset.first_row_id) {
+            _row_lineage_columns.first_row_id = iceberg_params.first_row_id;
+        }
+        if (iceberg_params.__isset.last_updated_sequence_number) {
+            _row_lineage_columns.last_updated_sequence_number =
+                    iceberg_params.last_updated_sequence_number;
+        }
+    }
+    RETURN_IF_ERROR(TableReader::prepare_split(options));
+    if (_is_table_level_count_active()) {
+        return Status::OK();
+    }
+    RETURN_IF_ERROR(_init_delete_predicates(options.current_range.table_format_params));
+    return Status::OK();
+}
+
+std::string IcebergTableReader::debug_string() const {
+    size_t position_delete_file_count = 0;
+    size_t equality_delete_file_count = 0;
+    size_t deletion_vector_file_count = 0;
+    if (_iceberg_params.has_value() && _iceberg_params->__isset.delete_files) {
+        for (const auto& delete_file : _iceberg_params->delete_files) {
+            if (!delete_file.__isset.content) {
+                continue;
+            }
+            if (delete_file.content == POSITION_DELETE) {
+                ++position_delete_file_count;
+            } else if (delete_file.content == EQUALITY_DELETE) {
+                ++equality_delete_file_count;
+            } else if (delete_file.content == DELETION_VECTOR) {
+                ++deletion_vector_file_count;
+            }
+        }
+    }
+
+    std::ostringstream equality_filters;
+    equality_filters << "[";
+    for (size_t idx = 0; idx < _equality_delete_filters.size(); ++idx) {
+        if (idx > 0) {
+            equality_filters << ", ";
+        }
+        const auto& filter = _equality_delete_filters[idx];
+        equality_filters << "EqualityDeleteFilter{field_ids="
+                         << join_values_for_debug(filter.field_ids) << ", key_types=[";
+        for (size_t type_idx = 0; type_idx < filter.key_types.size(); ++type_idx) {
+            if (type_idx > 0) {
+                equality_filters << ", ";
+            }
+            equality_filters << (filter.key_types[type_idx] == nullptr
+                                         ? "null"
+                                         : filter.key_types[type_idx]->get_name());
+        }
+        equality_filters << "], delete_block_rows=" << filter.delete_block.rows()
+                         << ", delete_block_columns=" << filter.delete_block.columns() << "}";
+    }
+    equality_filters << "]";
+
+    std::ostringstream out;
+    out << "IcebergTableReader{base=" << format::TableReader::debug_string()
+        << ", iceberg_params=" << iceberg_params_debug_string(_iceberg_params)
+        << ", row_lineage_first_row_id=" << _row_lineage_columns.first_row_id
+        << ", row_lineage_last_updated_sequence_number="
+        << _row_lineage_columns.last_updated_sequence_number
+        << ", need_row_lineage_row_id=" << _need_row_lineage_row_id()
+        << ", need_iceberg_rowid=" << _need_iceberg_rowid()
+        << ", row_position_block_position=" << _row_position_block_position
+        << ", delete_predicates_initialized=" << _delete_predicates_initialized
+        << ", position_delete_file_count=" << position_delete_file_count
+        << ", equality_delete_file_count=" << equality_delete_file_count
+        << ", deletion_vector_file_count=" << deletion_vector_file_count
+        << ", position_delete_rows_storage_count=" << _position_delete_rows_storage.size()
+        << ", equality_delete_filter_count=" << _equality_delete_filters.size()
+        << ", equality_delete_filters=" << equality_filters.str() << "}";
+    return out.str();
+}
+
+Status IcebergTableReader::materialize_virtual_columns(Block* table_block) {
+    for (size_t column_idx = 0; column_idx < _data_reader.column_mapper->mappings().size();
+         ++column_idx) {
+        const auto& mapping = _data_reader.column_mapper->mappings()[column_idx];
+        switch (mapping.virtual_column_type) {
+        case format::TableVirtualColumnType::ROW_ID:
+            RETURN_IF_ERROR(_materialize_row_lineage_row_id(table_block, column_idx));
+            break;
+        case format::TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER:
+            RETURN_IF_ERROR(
+                    _materialize_row_lineage_last_updated_sequence_number(table_block, column_idx));
+            break;
+        case format::TableVirtualColumnType::ICEBERG_ROWID:
+            RETURN_IF_ERROR(_materialize_iceberg_rowid(table_block, column_idx));
+            break;
+        case format::TableVirtualColumnType::INVALID:
+            break;
+        }
+    }
+    return Status::OK();
+}
+
+Status IcebergTableReader::customize_file_scan_request(format::FileScanRequest* file_request) {
+    RETURN_IF_ERROR(TableReader::customize_file_scan_request(file_request));
+    if ((_row_lineage_columns.first_row_id >= 0 && _need_row_lineage_row_id()) ||
+        _need_iceberg_rowid()) {
+        RETURN_IF_ERROR(_append_row_position_output_column(file_request));
+    }
+    RETURN_IF_ERROR(_append_equality_delete_predicates(file_request));
+    return Status::OK();
+}
+
+bool IcebergTableReader::_supports_aggregate_pushdown(TPushAggOp::type agg_type) const {
+    if (!TableReader::_supports_aggregate_pushdown(agg_type)) {
+        return false;
+    }
+    return _equality_delete_filters.empty();
+}
+
+Status IcebergTableReader::_parse_deletion_vector_file(const TTableFormatFileDesc& t_desc,
+                                                       DeleteFileDesc* desc,
+                                                       bool* has_delete_file) {
+    DORIS_CHECK(desc != nullptr);
+    DORIS_CHECK(has_delete_file != nullptr);
+    *has_delete_file = false;
+    if (!t_desc.__isset.iceberg_params) {
+        return Status::OK();
+    }
+    const auto& iceberg_params = t_desc.iceberg_params;
+    if (!iceberg_params.__isset.format_version ||
+        iceberg_params.format_version < MIN_SUPPORT_DELETE_FILES_VERSION ||
+        !iceberg_params.__isset.delete_files || iceberg_params.delete_files.empty()) {
+        return Status::OK();
+    }
+
+    const TIcebergDeleteFileDesc* deletion_vector = nullptr;
+    for (const auto& delete_file : iceberg_params.delete_files) {
+        if (!delete_file.__isset.content || delete_file.content != DELETION_VECTOR) {
+            continue;
+        }
+        if (deletion_vector != nullptr) {
+            return Status::DataQualityError("This iceberg data file has multiple DVs.");
+        }
+        deletion_vector = &delete_file;
+    }
+    if (deletion_vector == nullptr) {
+        return Status::OK();
+    }
+    if (!deletion_vector->__isset.content_offset ||
+        !deletion_vector->__isset.content_size_in_bytes) {
+        return Status::InternalError("Deletion vector is missing content offset or length");
+    }
+
+    desc->key = _iceberg_delete_vector_cache_key(*deletion_vector);
+    desc->path = deletion_vector->path;
+    desc->start_offset = deletion_vector->content_offset;
+    desc->size = deletion_vector->content_size_in_bytes;
+    desc->file_size = -1;
+    desc->format = DeleteFileDesc::Format::ICEBERG;
+    *has_delete_file = true;
+    return Status::OK();
+}
+
+Status IcebergTableReader::_init_delete_predicates(const TTableFormatFileDesc& t_desc) {
+    if (!t_desc.__isset.iceberg_params || _delete_predicates_initialized) {
+        _delete_predicates_initialized = true;
+        return Status::OK();
+    }
+    const auto& iceberg_params = t_desc.iceberg_params;
+    if (!iceberg_params.__isset.format_version ||
+        iceberg_params.format_version < MIN_SUPPORT_DELETE_FILES_VERSION ||
+        !iceberg_params.__isset.delete_files || iceberg_params.delete_files.empty()) {
+        _delete_predicates_initialized = true;
+        return Status::OK();
+    }
+
+    std::vector<TIcebergDeleteFileDesc> position_delete_files;
+    std::vector<TIcebergDeleteFileDesc> equality_delete_files;
+    for (const auto& delete_file : iceberg_params.delete_files) {
+        if (!delete_file.__isset.content) {
+            continue;
+        }
+        if (delete_file.content == POSITION_DELETE) {
+            position_delete_files.push_back(delete_file);
+        } else if (delete_file.content == EQUALITY_DELETE) {
+            equality_delete_files.push_back(delete_file);
+        }
+    }
+    // `_delete_rows != nullptr` means DeleteVector is parsed
+    if (_delete_rows != nullptr) {
+        _position_delete_rows_storage = *_delete_rows;
+        _delete_rows = &_position_delete_rows_storage;
+    }
+    // Combine position delete rows from both deletion vector and position delete files, and
+    // initialize equality delete predicates. Position delete files contain row positions of
+    // deleted rows, which can be directly added to `_delete_rows`. Equality delete files contain
+    // values of deleted rows, which require reading the files and building predicates for later
+    // filtering.
+    if (!position_delete_files.empty()) {
+        RETURN_IF_ERROR(_init_position_delete_rows(position_delete_files));
+    }
+    if (!equality_delete_files.empty()) {
+        RETURN_IF_ERROR(_init_equality_delete_predicates(equality_delete_files));
+    }
+
+    _delete_predicates_initialized = true;
+    return Status::OK();
+}
+
+std::string IcebergTableReader::_iceberg_delete_vector_cache_key(
+        const TIcebergDeleteFileDesc& delete_file) {
+    const std::string key_prefix = "iceberg_dv:";
+    std::string key;
+    key.resize(key_prefix.size() + delete_file.path.size() + sizeof(delete_file.content_offset) +
+               sizeof(delete_file.content_size_in_bytes));
+    char* data = key.data();
+    memcpy(data, key_prefix.data(), key_prefix.size());
+    data += key_prefix.size();
+    memcpy(data, delete_file.path.data(), delete_file.path.size());
+    data += delete_file.path.size();
+    memcpy(data, &delete_file.content_offset, sizeof(delete_file.content_offset));
+    data += sizeof(delete_file.content_offset);
+    memcpy(data, &delete_file.content_size_in_bytes, sizeof(delete_file.content_size_in_bytes));
+    return key;
+}
+
+std::shared_ptr<io::FileSystemProperties> IcebergTableReader::_delete_file_system_properties(
+        const TFileScanRangeParams& scan_params) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type =
+            scan_params.__isset.file_type ? scan_params.file_type : TFileType::FILE_LOCAL;
+    system_properties->properties = scan_params.properties;
+    system_properties->hdfs_params = scan_params.hdfs_params;
+    if (scan_params.__isset.broker_addresses) {
+        system_properties->broker_addresses.assign(scan_params.broker_addresses.begin(),
+                                                   scan_params.broker_addresses.end());
+    }
+    return system_properties;
+}
+
+std::unique_ptr<io::FileDescription> IcebergTableReader::_delete_file_description(
+        const TFileRangeDesc& range) {
+    auto file_description = std::make_unique<io::FileDescription>();
+    file_description->path = range.path;
+    file_description->file_size = range.__isset.file_size ? range.file_size : -1;
+    file_description->range_start_offset = range.__isset.start_offset ? range.start_offset : 0;
+    file_description->range_size = range.__isset.size ? range.size : -1;
+    if (range.__isset.fs_name) {
+        file_description->fs_name = range.fs_name;
+    }
+    return file_description;
+}
+
+std::string IcebergTableReader::_data_file_path() const {
+    if (_iceberg_params.has_value() && _iceberg_params->__isset.original_file_path) {
+        return _iceberg_params->original_file_path;
+    }
+    DORIS_CHECK(_current_task != nullptr);
+    DORIS_CHECK(_current_task->data_file != nullptr);
+    return _current_task->data_file->path;
+}
+
+Status IcebergTableReader::_append_row_position_output_column(format::FileScanRequest* request) {
+    const auto row_position_column_id = format::LocalColumnId(format::ROW_POSITION_COLUMN_ID);
+    _append_file_scan_column(request, row_position_column_id, &request->non_predicate_columns);
+    _row_position_block_position = request->local_positions.at(row_position_column_id).value();
+    return Status::OK();
+}
+
+Status IcebergTableReader::_append_equality_delete_predicates(format::FileScanRequest* request) {
+    DORIS_CHECK(request != nullptr);
+    for (const auto& filter : _equality_delete_filters) {
+        auto delete_predicate =
+                std::make_shared<EqualityDeletePredicate>(filter.delete_block, filter.field_ids);
+        DCHECK_EQ(filter.field_ids.size(), filter.key_types.size());
+        for (size_t idx = 0; idx < filter.field_ids.size(); ++idx) {
+            const int field_id = filter.field_ids[idx];
+            auto field_it = std::ranges::find_if(
+                    _data_reader.file_schema, [field_id](const format::ColumnDefinition& field) {
+                        return field.has_identifier_field_id() &&
+                               field.get_identifier_field_id() == field_id;
+                    });
+            if (field_it == _data_reader.file_schema.end()) {
+                return Status::InternalError(
+                        "Can not find equality delete column field id {} in data file schema",
+                        field_id);
+            }
+            const auto field_column_id = format::LocalColumnId(field_it->file_local_id());
+            _append_file_scan_column(request, field_column_id, &request->predicate_columns);
+            const auto block_position = request->local_positions.at(field_column_id).value();
+            auto slot = VSlotRef::create_shared(cast_set<int>(block_position),
+                                                cast_set<int>(block_position), -1, field_it->type,
+                                                field_it->name);
+            if (field_it->type->equals(*filter.key_types[idx])) {
+                delete_predicate->add_child(std::move(slot));
+            } else {
+                auto cast_expr = Cast::create_shared(filter.key_types[idx]);
+                cast_expr->add_child(std::move(slot));
+                delete_predicate->add_child(std::move(cast_expr));
+            }
+        }
+        request->delete_conjuncts.push_back(
+                VExprContext::create_shared(std::move(delete_predicate)));
+    }
+    return Status::OK();
+}
+
+Status IcebergTableReader::_read_parquet_position_delete_file(
+        const TIcebergDeleteFileDesc& delete_file, const TFileScanRangeParams& scan_params,
+        IcebergDeleteFileIOContext* delete_io_ctx, PositionDeleteRowsCollector* collector) {
+    if (!delete_file.__isset.file_format) {
+        return Status::InternalError("Iceberg position delete file is missing file format");
+    }
+    if (delete_file.file_format == TFileFormatType::FORMAT_ORC) {
+        return Status::NotSupported("Iceberg ORC position delete file is not supported");
+    }
+    if (delete_file.file_format != TFileFormatType::FORMAT_PARQUET) {
+        return Status::NotSupported("Unsupported Iceberg delete file format {}",
+                                    delete_file.file_format);
+    }
+
+    auto delete_range = build_iceberg_delete_file_range(delete_file.path);
+    if (_current_task != nullptr && _current_task->data_file != nullptr &&
+        !_current_task->data_file->fs_name.empty()) {
+        delete_range.__set_fs_name(_current_task->data_file->fs_name);
+    }
+    auto system_properties = _delete_file_system_properties(scan_params);
+    auto file_description = _delete_file_description(delete_range);
+    std::shared_ptr<io::IOContext> io_ctx(&delete_io_ctx->io_ctx, [](io::IOContext*) {});
+    format::parquet::ParquetReader reader(system_properties, file_description, io_ctx,
+                                          _scanner_profile);
+    RETURN_IF_ERROR(reader.init(_runtime_state));
+
+    std::vector<format::ColumnDefinition> schema;
+    RETURN_IF_ERROR(reader.get_schema(&schema));
+    format::ColumnDefinition* file_path_field = nullptr;
+    format::ColumnDefinition* pos_field = nullptr;
+    for (auto& field : schema) {
+        if (field.name == ICEBERG_FILE_PATH) {
+            file_path_field = &field;
+        } else if (field.name == ICEBERG_ROW_POS) {
+            pos_field = &field;
+        }
+    }
+    if (file_path_field == nullptr || pos_field == nullptr) {
+        return Status::InternalError("Position delete parquet file is missing required columns");
+    }
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {
+            format::LocalColumnIndex::top_level(
+                    format::LocalColumnId(file_path_field->file_local_id())),
+            format::LocalColumnIndex::top_level(format::LocalColumnId(pos_field->file_local_id()))};
+    request->local_positions = {
+            {format::LocalColumnId(file_path_field->file_local_id()),
+             format::LocalIndex(ICEBERG_FILE_PATH_BLOCK_POSITION)},
+            {format::LocalColumnId(pos_field->file_local_id()),
+             format::LocalIndex(ICEBERG_ROW_POS_BLOCK_POSITION)},
+    };
+    RETURN_IF_ERROR(reader.open(request));
+
+    bool eof = false;
+    auto build_position_delete_block = [](const format::ColumnDefinition& file_path_field,
+                                          const format::ColumnDefinition& pos_field) -> Block {
+        Block block;
+        block.insert(
+                {file_path_field.type->create_column(), file_path_field.type, ICEBERG_FILE_PATH});
+        block.insert({pos_field.type->create_column(), pos_field.type, ICEBERG_ROW_POS});
+        return block;
+    };
+    while (!eof) {
+        Block block = build_position_delete_block(*file_path_field, *pos_field);
+        size_t read_rows = 0;
+        RETURN_IF_ERROR(reader.get_block(&block, &read_rows, &eof));
+        RETURN_IF_ERROR(collector->collect(block, read_rows));
+    }
+    return reader.close();
+}
+
+Status IcebergTableReader::_init_position_delete_rows(
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    TFileScanRangeParams delete_scan_params =
+            _scan_params == nullptr ? TFileScanRangeParams() : *_scan_params;
+    format::DeleteRows position_delete_rows;
+    IcebergDeleteFileIOContext delete_io_ctx(_runtime_state);
+    PositionDeleteRowsCollector collector(_data_file_path(), &position_delete_rows);
+    for (const auto& delete_file : delete_files) {
+        RETURN_IF_ERROR(_read_parquet_position_delete_file(delete_file, delete_scan_params,
+                                                           &delete_io_ctx, &collector));
+    }
+    if (position_delete_rows.empty()) {
+        return Status::OK();
+    }
+    // Position delete files and deletion vectors both become row-position deletes for the
+    // common TableReader DeletePredicate path. Keep the merged rows in a member vector because
+    // DeletePredicate stores a reference to the vector used by _delete_rows.
+    _position_delete_rows_storage.insert(_position_delete_rows_storage.end(),
+                                         position_delete_rows.begin(), position_delete_rows.end());
+    std::sort(_position_delete_rows_storage.begin(), _position_delete_rows_storage.end());
+    _position_delete_rows_storage.erase(
+            std::unique(_position_delete_rows_storage.begin(), _position_delete_rows_storage.end()),
+            _position_delete_rows_storage.end());
+    _delete_rows = &_position_delete_rows_storage;
+    return Status::OK();
+}
+
+Status IcebergTableReader::_init_equality_delete_predicates(
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    TFileScanRangeParams delete_scan_params =
+            _scan_params == nullptr ? TFileScanRangeParams() : *_scan_params;
+    IcebergDeleteFileIOContext delete_io_ctx(_runtime_state);
+    for (const auto& delete_file : delete_files) {
+        RETURN_IF_ERROR(_read_parquet_equality_delete_file(delete_file, delete_scan_params,
+                                                           &delete_io_ctx));
+    }
+    return Status::OK();
+}
+
+Status IcebergTableReader::_read_parquet_equality_delete_file(
+        const TIcebergDeleteFileDesc& delete_file, const TFileScanRangeParams& scan_params,
+        IcebergDeleteFileIOContext* delete_io_ctx) {
+    if (!delete_file.__isset.file_format) {
+        return Status::InternalError("Iceberg equality delete file is missing file format");
+    }
+    if (delete_file.file_format != TFileFormatType::FORMAT_PARQUET) {
+        return Status::NotSupported("Unsupported Iceberg equality delete file format {}",
+                                    delete_file.file_format);
+    }
+    if (!delete_file.__isset.field_ids || delete_file.field_ids.empty()) {
+        return Status::InternalError("Iceberg equality delete file is missing field ids");
+    }
+
+    auto delete_range = build_iceberg_delete_file_range(delete_file.path);
+    if (_current_task != nullptr && _current_task->data_file != nullptr &&
+        !_current_task->data_file->fs_name.empty()) {
+        delete_range.__set_fs_name(_current_task->data_file->fs_name);
+    }
+    auto system_properties = _delete_file_system_properties(scan_params);
+    auto file_description = _delete_file_description(delete_range);
+    std::shared_ptr<io::IOContext> io_ctx(&delete_io_ctx->io_ctx, [](io::IOContext*) {});
+    format::parquet::ParquetReader reader(system_properties, file_description, io_ctx,
+                                          _scanner_profile);
+    RETURN_IF_ERROR(reader.init(_runtime_state));
+
+    std::vector<format::ColumnDefinition> schema;
+    RETURN_IF_ERROR(reader.get_schema(&schema));
+    std::vector<format::ColumnDefinition> delete_fields;
+    std::vector<int> delete_field_ids;
+    std::vector<DataTypePtr> delete_key_types;
+    for (const auto field_id : delete_file.field_ids) {
+        auto field_it = std::find_if(schema.begin(), schema.end(),
+                                     [field_id](const format::ColumnDefinition& field) {
+                                         return field.has_identifier_field_id() &&
+                                                field_id == field.get_identifier_field_id();
+                                     });
+        if (field_it == schema.end()) {
+            return Status::InternalError("Can not find field id {} in equality delete file {}",
+                                         field_id, delete_file.path);
+        }
+        if (!field_it->children.empty()) {
+            return Status::NotSupported(
+                    "Iceberg equality delete does not support complex column {}", field_it->name);
+        }
+        delete_fields.push_back(*field_it);
+        delete_field_ids.push_back(field_id);
+        delete_key_types.push_back(field_it->type);
+    }
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    for (size_t idx = 0; idx < delete_fields.size(); ++idx) {
+        const auto local_column_id = format::LocalColumnId(delete_fields[idx].file_local_id());
+        request->non_predicate_columns.push_back(
+                format::LocalColumnIndex::top_level(local_column_id));
+        request->local_positions.emplace(local_column_id, format::LocalIndex(idx));
+    }
+    RETURN_IF_ERROR(reader.open(request));
+
+    auto build_equality_delete_block =
+            [](const std::vector<format::ColumnDefinition> fields) -> Block {
+        Block block;
+        for (const auto& field : fields) {
+            block.insert({field.type->create_column(), field.type, field.name});
+        }
+        return block;
+    };
+    Block delete_block = build_equality_delete_block(delete_fields);
+    MutableBlock mutable_delete_block(std::move(delete_block));
+    bool eof = false;
+    while (!eof) {
+        Block block = build_equality_delete_block(delete_fields);
+        size_t read_rows = 0;
+        RETURN_IF_ERROR(reader.get_block(&block, &read_rows, &eof));
+        if (read_rows > 0) {
+            RETURN_IF_ERROR(mutable_delete_block.merge(block));
+        }
+    }
+    RETURN_IF_ERROR(reader.close());
+    delete_block = mutable_delete_block.to_block();
+    _equality_delete_filters.push_back(
+            EqualityDeleteFilter {.field_ids = std::move(delete_field_ids),
+                                  .key_types = std::move(delete_key_types),
+                                  .delete_block = std::move(delete_block)});
+    return Status::OK();
+}
+
+Status IcebergTableReader::_materialize_row_lineage_row_id(Block* table_block, size_t column_idx) {
+    if (_row_lineage_columns.first_row_id < 0) {
+        return Status::OK();
+    }
+    DORIS_CHECK(_row_position_block_position < _data_reader.block_template.columns());
+    const auto& row_position_column = assert_cast<const ColumnInt64&>(
+            *_data_reader.block_template.get_by_position(_row_position_block_position).column);
+    DORIS_CHECK(row_position_column.size() == table_block->rows());
+    auto column = IColumn::mutate(
+            table_block->get_by_position(column_idx).column->convert_to_full_column_if_const());
+    auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
+    auto& null_map = nullable_column->get_null_map_data();
+    auto& data = assert_cast<ColumnInt64&>(*nullable_column->get_nested_column_ptr()).get_data();
+    DORIS_CHECK(null_map.size() == row_position_column.size());
+    DORIS_CHECK(data.size() == row_position_column.size());
+    for (size_t row = 0; row < row_position_column.size(); ++row) {
+        if (null_map[row]) {
+            null_map[row] = 0;
+            data[row] = _row_lineage_columns.first_row_id + row_position_column.get_element(row);
+        }
+    }
+    table_block->replace_by_position(column_idx, std::move(column));
+    return Status::OK();
+}
+
+Status IcebergTableReader::_materialize_iceberg_rowid(Block* table_block, size_t column_idx) {
+    DORIS_CHECK(_row_position_block_position < _data_reader.block_template.columns());
+    const auto& row_position_column = assert_cast<const ColumnInt64&>(
+            *_data_reader.block_template.get_by_position(_row_position_block_position).column);
+    DORIS_CHECK(row_position_column.size() == table_block->rows());
+
+    const auto& type = table_block->get_by_position(column_idx).type;
+    auto column = type->create_column();
+    auto* nullable_column = check_and_get_column<ColumnNullable>(column.get());
+    auto* struct_column = nullable_column != nullptr
+                                  ? check_and_get_column<ColumnStruct>(
+                                            nullable_column->get_nested_column_ptr().get())
+                                  : check_and_get_column<ColumnStruct>(column.get());
+    DORIS_CHECK(struct_column != nullptr);
+    DORIS_CHECK(struct_column->tuple_size() >= 4);
+
+    const auto rows = row_position_column.size();
+    const auto file_path = _data_file_path();
+    const int32_t partition_spec_id =
+            _iceberg_params.has_value() && _iceberg_params->__isset.partition_spec_id
+                    ? _iceberg_params->partition_spec_id
+                    : 0;
+    const std::string partition_data_json =
+            _iceberg_params.has_value() && _iceberg_params->__isset.partition_data_json
+                    ? _iceberg_params->partition_data_json
+                    : "";
+
+    auto& file_path_column = struct_column->get_column(0);
+    auto& row_pos_column = struct_column->get_column(1);
+    auto& spec_id_column = struct_column->get_column(2);
+    auto& partition_data_column = struct_column->get_column(3);
+    file_path_column.reserve(rows);
+    row_pos_column.reserve(rows);
+    spec_id_column.reserve(rows);
+    partition_data_column.reserve(rows);
+    for (size_t row = 0; row < rows; ++row) {
+        file_path_column.insert_data(file_path.data(), file_path.size());
+        const int64_t row_pos = row_position_column.get_element(row);
+        row_pos_column.insert_data(reinterpret_cast<const char*>(&row_pos), sizeof(row_pos));
+        spec_id_column.insert_data(reinterpret_cast<const char*>(&partition_spec_id),
+                                   sizeof(partition_spec_id));
+        partition_data_column.insert_data(partition_data_json.data(), partition_data_json.size());
+    }
+    if (nullable_column != nullptr) {
+        nullable_column->get_null_map_data().resize_fill(rows, 0);
+    }
+    table_block->replace_by_position(column_idx, std::move(column));
+    return Status::OK();
+}
+
+Status IcebergTableReader::_materialize_row_lineage_last_updated_sequence_number(
+        Block* table_block, size_t column_idx) {
+    if (_row_lineage_columns.last_updated_sequence_number < 0) {
+        return Status::OK();
+    }
+    auto column = IColumn::mutate(
+            table_block->get_by_position(column_idx).column->convert_to_full_column_if_const());
+    auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
+    auto& null_map = nullable_column->get_null_map_data();
+    auto& data = assert_cast<ColumnInt64&>(*nullable_column->get_nested_column_ptr()).get_data();
+    DORIS_CHECK(null_map.size() == table_block->rows());
+    DORIS_CHECK(data.size() == table_block->rows());
+    for (size_t row = 0; row < table_block->rows(); ++row) {
+        if (null_map[row]) {
+            null_map[row] = 0;
+            data[row] = _row_lineage_columns.last_updated_sequence_number;
+        }
+    }
+    table_block->replace_by_position(column_idx, std::move(column));
+    return Status::OK();
+}
+
+bool IcebergTableReader::_need_row_lineage_row_id() const {
+    if (_data_reader.column_mapper != nullptr) {
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            if (mapping.virtual_column_type == format::TableVirtualColumnType::ROW_ID) {
+                return true;
+            }
+        }
+    }
+    return std::ranges::any_of(_projected_columns, is_projected_row_lineage_row_id);
+}
+
+bool IcebergTableReader::_need_iceberg_rowid() const {
+    if (_data_reader.column_mapper != nullptr) {
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            if (mapping.virtual_column_type == format::TableVirtualColumnType::ICEBERG_ROWID) {
+                return true;
+            }
+        }
+    }
+    return std::ranges::any_of(_projected_columns, is_projected_iceberg_rowid);
+}
+
+} // namespace doris::format::iceberg
diff --git a/be/src/format_v2/table/iceberg_reader.h b/be/src/format_v2/table/iceberg_reader.h
new file mode 100644
index 00000000000000..1a2811ef968277
--- /dev/null
+++ b/be/src/format_v2/table/iceberg_reader.h
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "format/table/iceberg_delete_file_reader_helper.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris {
+class Block;
+struct DeleteFileDesc;
+namespace io {
+struct FileDescription;
+struct FileSystemProperties;
+} // namespace io
+} // namespace doris
+
+namespace doris::format::iceberg {
+
+// Iceberg table-level reader.
+// It reuses TableReader for split orchestration, dynamic partition pruning and table-block
+// finalization, while composing a FileReader for physical data-file reads instead of inheriting
+// from a concrete file-format reader.
+class IcebergTableReader : public format::TableReader {
+public:
+    ~IcebergTableReader() override = default;
+    Status init(format::TableReadOptions&& options) override {
+        RETURN_IF_ERROR(format::TableReader::init(std::move(options)));
+        _mapper_options.mode = format::TableColumnMappingMode::BY_FIELD_ID;
+        return Status::OK();
+    }
+
+    Status prepare_split(const format::SplitReadOptions& options) override;
+    std::string debug_string() const override;
+    format::TableColumnMappingMode mapping_mode() const override {
+        return !_data_reader.file_schema.empty() && _has_field_id(_data_reader.file_schema)
+                       ? format::TableColumnMappingMode::BY_FIELD_ID
+                       : format::TableColumnMappingMode::BY_NAME;
+    }
+
+protected:
+    Status materialize_virtual_columns(Block* table_block) override;
+
+    Status customize_file_scan_request(format::FileScanRequest* file_request) override;
+
+    bool _supports_aggregate_pushdown(TPushAggOp::type agg_type) const override;
+
+    Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc,
+                                       bool* has_delete_file) override;
+
+    Status _init_delete_predicates(const TTableFormatFileDesc& t_desc);
+
+private:
+    bool _has_field_id(const std::vector<format::ColumnDefinition>& schema) const {
+        for (const auto& field : schema) {
+            // TopN lazy materialization asks the file reader to synthesize GLOBAL_ROWID in the
+            // first-phase scan. That virtual column is not an Iceberg data field and therefore has
+            // no Iceberg field id. Do not let it downgrade schema-evolution reads to BY_NAME,
+            // otherwise old data files whose physical names predate a rename (for example,
+            // table column `new_new_id` stored as file column `id`) are materialized as defaults.
+            if (field.column_type != format::ColumnType::DATA_COLUMN) {
+                continue;
+            }
+            if (!field.has_identifier_field_id()) {
+                return false;
+            }
+            if (!_has_field_id(field.children)) {
+                return false;
+            }
+        }
+        return true;
+    }
+    static constexpr int MIN_SUPPORT_DELETE_FILES_VERSION = 2;
+    static constexpr int POSITION_DELETE = 1;
+    static constexpr int EQUALITY_DELETE = 2;
+    static constexpr int DELETION_VECTOR = 3;
+
+    struct RowLineageColumns {
+        int64_t first_row_id = -1;
+        int64_t last_updated_sequence_number = -1;
+    };
+
+    static constexpr const char* ICEBERG_FILE_PATH = "file_path";
+    static constexpr const char* ICEBERG_ROW_POS = "pos";
+    static constexpr size_t ICEBERG_FILE_PATH_BLOCK_POSITION = 0;
+    static constexpr size_t ICEBERG_ROW_POS_BLOCK_POSITION = 1;
+
+    class PositionDeleteRowsCollector final {
+    public:
+        PositionDeleteRowsCollector(std::string data_file_path, format::DeleteRows* rows);
+
+        Status collect(const Block& block, size_t read_rows);
+
+    private:
+        std::string _data_file_path;
+        format::DeleteRows* _rows = nullptr;
+    };
+
+    static std::string _iceberg_delete_vector_cache_key(const TIcebergDeleteFileDesc& delete_file);
+
+    static std::shared_ptr<io::FileSystemProperties> _delete_file_system_properties(
+            const TFileScanRangeParams& scan_params);
+
+    static std::unique_ptr<io::FileDescription> _delete_file_description(
+            const TFileRangeDesc& range);
+
+    std::string _data_file_path() const;
+
+    // Append row position column to file scan request for position delete handling.
+    Status _append_row_position_output_column(format::FileScanRequest* request);
+    // Append equality delete predicates to file scan request based on the delete files in iceberg
+    // params. DeleteVector and position delete files use the common DeleteRows path in TableReader.
+    Status _append_equality_delete_predicates(format::FileScanRequest* request);
+
+    Status _init_equality_delete_predicates(
+            const std::vector<TIcebergDeleteFileDesc>& delete_files);
+
+    // Read equality/position delete files.
+    Status _read_parquet_equality_delete_file(const TIcebergDeleteFileDesc& delete_file,
+                                              const TFileScanRangeParams& scan_params,
+                                              IcebergDeleteFileIOContext* delete_io_ctx);
+    Status _read_parquet_position_delete_file(const TIcebergDeleteFileDesc& delete_file,
+                                              const TFileScanRangeParams& scan_params,
+                                              IcebergDeleteFileIOContext* delete_io_ctx,
+                                              PositionDeleteRowsCollector* collector);
+
+    // Read position delete files and collect deleted row positions to update DeletePredicate.
+    Status _init_position_delete_rows(const std::vector<TIcebergDeleteFileDesc>& delete_files);
+
+    // Materialize row lineage virtual columns based on the position delete file.
+    Status _materialize_iceberg_rowid(Block* table_block, size_t column_idx);
+    Status _materialize_row_lineage_row_id(Block* table_block, size_t column_idx);
+    Status _materialize_row_lineage_last_updated_sequence_number(Block* table_block,
+                                                                 size_t column_idx);
+
+    RowLineageColumns _row_lineage_columns;
+    size_t _row_position_block_position = 0;
+    std::optional<TIcebergFileDesc> _iceberg_params;
+    bool _delete_predicates_initialized = false;
+    format::DeleteRows _position_delete_rows_storage;
+    struct EqualityDeleteFilter {
+        std::vector<int> field_ids;
+        std::vector<DataTypePtr> key_types;
+        Block delete_block;
+    };
+    std::vector<EqualityDeleteFilter> _equality_delete_filters;
+
+    bool _need_row_lineage_row_id() const;
+    bool _need_iceberg_rowid() const;
+};
+
+} // namespace doris::format::iceberg
diff --git a/be/src/format_v2/table/paimon_reader.cpp b/be/src/format_v2/table/paimon_reader.cpp
new file mode 100644
index 00000000000000..c82c99dd2854fa
--- /dev/null
+++ b/be/src/format_v2/table/paimon_reader.cpp
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/paimon_reader.h"
+
+#include <glog/logging.h>
+
+#include <cstring>
+#include <string>
+#include <utility>
+
+#include "exprs/vexpr_context.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/jni/paimon_jni_reader.h"
+#include "format_v2/table/schema_history_util.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format::paimon {
+
+Status PaimonReader::prepare_split(const format::SplitReadOptions& options) {
+    _split_schema_id = -1;
+    const auto& paimon_params = options.current_range.table_format_params.paimon_params;
+    if (paimon_params.__isset.schema_id) {
+        _split_schema_id = paimon_params.schema_id;
+    }
+    return format::TableReader::prepare_split(options);
+}
+
+format::TableColumnMappingMode PaimonReader::mapping_mode() const {
+    return format::can_map_by_history_schema(_scan_params, _split_schema_id)
+                   ? format::TableColumnMappingMode::BY_FIELD_ID
+                   : format::TableColumnMappingMode::BY_NAME;
+}
+
+Status PaimonReader::annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
+    DORIS_CHECK(file_schema != nullptr);
+    if (mapping_mode() != format::TableColumnMappingMode::BY_FIELD_ID) {
+        return Status::OK();
+    }
+    return format::annotate_file_schema_from_history(_scan_params, _split_schema_id, file_schema);
+}
+
+Status PaimonReader::_parse_deletion_vector_file(const TTableFormatFileDesc& t_desc,
+                                                 DeleteFileDesc* desc, bool* has_delete_file) {
+    DORIS_CHECK(desc != nullptr);
+    DORIS_CHECK(has_delete_file != nullptr);
+    *has_delete_file = false;
+    const auto& table_desc = t_desc.paimon_params;
+    if (!table_desc.__isset.deletion_file) {
+        return Status::OK();
+    }
+    const auto& deletion_file = table_desc.deletion_file;
+
+    const std::string key_prefix = "paimon_dv:";
+    desc->key.resize(key_prefix.size() + deletion_file.path.size() + sizeof(deletion_file.offset));
+    char* key_data = desc->key.data();
+    memcpy(key_data, key_prefix.data(), key_prefix.size());
+    key_data += key_prefix.size();
+    memcpy(key_data, deletion_file.path.data(), deletion_file.path.size());
+    key_data += deletion_file.path.size();
+    memcpy(key_data, &deletion_file.offset, sizeof(deletion_file.offset));
+    desc->path = deletion_file.path;
+    desc->start_offset = deletion_file.offset;
+    desc->size = deletion_file.length + 4;
+    desc->file_size = -1;
+    desc->format = DeleteFileDesc::Format::PAIMON;
+    *has_delete_file = true;
+    return Status::OK();
+}
+
+Status PaimonHybridReader::init(format::TableReadOptions&& options) {
+    return format::TableReader::init(std::move(options));
+}
+
+Status PaimonHybridReader::prepare_split(const format::SplitReadOptions& options) {
+    RETURN_IF_ERROR(_ensure_current_split_reader(options));
+    DORIS_CHECK(_current_split_reader != nullptr);
+    return _current_split_reader->prepare_split(options);
+}
+
+Status PaimonHybridReader::get_block(Block* block, bool* eos) {
+    DORIS_CHECK(_current_split_reader != nullptr);
+    return _current_split_reader->get_block(block, eos);
+}
+
+Status PaimonHybridReader::close() {
+    Status close_status = Status::OK();
+    if (_native_reader != nullptr) {
+        close_status = _native_reader->close();
+    }
+    if (_jni_reader != nullptr) {
+        auto status = _jni_reader->close();
+        if (!status.ok() && close_status.ok()) {
+            close_status = std::move(status);
+        }
+    }
+    _current_split_reader = nullptr;
+    return close_status;
+}
+
+Status PaimonHybridReader::_ensure_current_split_reader(const format::SplitReadOptions& options) {
+    if (_is_jni_split(options.current_range)) {
+        DCHECK(options.current_split_format == format::FileFormat::JNI);
+        if (_jni_reader == nullptr) {
+            _jni_reader = std::make_unique<format::paimon::PaimonJniReader>();
+            RETURN_IF_ERROR(_init_child_reader(_jni_reader.get(), format::FileFormat::JNI));
+        }
+        _current_split_reader = _jni_reader.get();
+    } else {
+        format::FileFormat file_format;
+        RETURN_IF_ERROR(_to_file_format(options.current_range, &file_format));
+        DCHECK(options.current_split_format == file_format);
+        DCHECK(file_format == format::FileFormat::PARQUET ||
+               file_format == format::FileFormat::ORC);
+        if (_native_reader == nullptr) {
+            _native_reader = format::paimon::PaimonReader::create_unique();
+            RETURN_IF_ERROR(_init_child_reader(_native_reader.get(), file_format));
+        }
+        _current_split_reader = _native_reader.get();
+    }
+    return Status::OK();
+}
+
+Status PaimonHybridReader::_init_child_reader(format::TableReader* reader,
+                                              format::FileFormat file_format) {
+    DORIS_CHECK(reader != nullptr);
+    VExprContextSPtrs conjuncts;
+    RETURN_IF_ERROR(_clone_conjuncts(&conjuncts));
+    return reader->init({
+            .projected_columns = _projected_columns,
+            .column_predicates = _table_column_predicates,
+            .conjuncts = std::move(conjuncts),
+            .format = file_format,
+            .scan_params = _scan_params,
+            .io_ctx = _io_ctx,
+            .runtime_state = _runtime_state,
+            .scanner_profile = _scanner_profile,
+            .push_down_agg_type = _push_down_agg_type,
+            .condition_cache_digest = _condition_cache_digest,
+    });
+}
+
+Status PaimonHybridReader::_clone_conjuncts(VExprContextSPtrs* conjuncts) const {
+    DORIS_CHECK(conjuncts != nullptr);
+    conjuncts->clear();
+    conjuncts->reserve(_conjuncts.size());
+    for (const auto& conjunct : _conjuncts) {
+        VExprSPtr root;
+        RETURN_IF_ERROR(format::clone_table_expr_tree(conjunct->root(), &root));
+        conjuncts->push_back(VExprContext::create_shared(std::move(root)));
+    }
+    return Status::OK();
+}
+
+bool PaimonHybridReader::_is_jni_split(const TFileRangeDesc& range) {
+    return range.__isset.table_format_params && range.table_format_params.__isset.paimon_params &&
+           range.table_format_params.paimon_params.__isset.reader_type &&
+           range.table_format_params.paimon_params.reader_type == TPaimonReaderType::PAIMON_JNI;
+}
+
+Status PaimonHybridReader::_to_file_format(const TFileRangeDesc& range,
+                                           format::FileFormat* file_format) {
+    DORIS_CHECK(file_format != nullptr);
+    const auto format_type =
+            range.__isset.format_type ? range.format_type : TFileFormatType::FORMAT_PARQUET;
+    switch (format_type) {
+    case TFileFormatType::FORMAT_PARQUET:
+        *file_format = format::FileFormat::PARQUET;
+        return Status::OK();
+    case TFileFormatType::FORMAT_ORC:
+        *file_format = format::FileFormat::ORC;
+        return Status::OK();
+    default:
+        return Status::NotSupported("Unsupported native Paimon file format {}",
+                                    to_string(format_type));
+    }
+}
+
+} // namespace doris::format::paimon
diff --git a/be/src/format_v2/table/paimon_reader.h b/be/src/format_v2/table/paimon_reader.h
new file mode 100644
index 00000000000000..200c4e885b5055
--- /dev/null
+++ b/be/src/format_v2/table/paimon_reader.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "format_v2/table_reader.h"
+
+namespace doris {
+struct DeleteFileDesc;
+}
+namespace doris::format::paimon {
+
+class PaimonReader final : public format::TableReader {
+public:
+    ENABLE_FACTORY_CREATOR(PaimonReader);
+    ~PaimonReader() final = default;
+    Status prepare_split(const format::SplitReadOptions& options) override;
+
+#ifdef BE_TEST
+    void TEST_set_scan_params(TFileScanRangeParams* params) { _scan_params = params; }
+    format::TableColumnMappingMode TEST_mapping_mode() const { return mapping_mode(); }
+    Status TEST_annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) {
+        return annotate_file_schema(file_schema);
+    }
+#endif
+
+protected:
+    format::TableColumnMappingMode mapping_mode() const override;
+    Status annotate_file_schema(std::vector<format::ColumnDefinition>* file_schema) override;
+
+    Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc,
+                                       bool* has_delete_file) override;
+
+private:
+    int64_t _split_schema_id = -1;
+};
+
+// Paimon scans can contain both native data-file splits and serialized JNI splits in the same
+// SplitSource. FileScannerV2 owns one table reader for the scanner lifetime, so this reader keeps
+// native and JNI child readers internally and dispatches each split to the matching child reader.
+class PaimonHybridReader final : public format::TableReader {
+public:
+    ~PaimonHybridReader() override = default;
+
+    Status init(format::TableReadOptions&& options) override;
+    Status prepare_split(const format::SplitReadOptions& options) override;
+    Status get_block(Block* block, bool* eos) override;
+    Status close() override;
+
+#ifdef BE_TEST
+    static bool TEST_is_jni_split(const TFileRangeDesc& range) { return _is_jni_split(range); }
+    static Status TEST_to_file_format(const TFileRangeDesc& range,
+                                      format::FileFormat* file_format) {
+        return _to_file_format(range, file_format);
+    }
+#endif
+
+private:
+    Status _ensure_current_split_reader(const format::SplitReadOptions& options);
+    Status _init_child_reader(format::TableReader* reader, format::FileFormat file_format);
+    Status _clone_conjuncts(VExprContextSPtrs* conjuncts) const;
+    static bool _is_jni_split(const TFileRangeDesc& range);
+    static Status _to_file_format(const TFileRangeDesc& range, format::FileFormat* file_format);
+
+    std::unique_ptr<format::TableReader> _native_reader; // handle parquet/orc native splits
+    std::unique_ptr<format::TableReader> _jni_reader;    // handle serialized JNI splits
+    format::TableReader* _current_split_reader = nullptr;
+};
+
+} // namespace doris::format::paimon
diff --git a/be/src/format_v2/table/remote_doris_reader.cpp b/be/src/format_v2/table/remote_doris_reader.cpp
new file mode 100644
index 00000000000000..39580fd2561897
--- /dev/null
+++ b/be/src/format_v2/table/remote_doris_reader.cpp
@@ -0,0 +1,365 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/remote_doris_reader.h"
+
+#include <arrow/flight/client.h>
+#include <arrow/flight/types.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/cast_set.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type_serde/data_type_serde.h"
+#include "format/arrow/arrow_utils.h"
+#include "format_v2/materialized_reader_util.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "util/timezone_utils.h"
+
+namespace doris::format::remote_doris {
+namespace {
+
+Status validate_remote_doris_range(const TFileRangeDesc& range) {
+    if (!range.__isset.table_format_params ||
+        range.table_format_params.table_format_type != "remote_doris") {
+        return Status::InvalidArgument("Remote Doris v2 reader requires remote_doris table format");
+    }
+    if (!range.table_format_params.__isset.remote_doris_params) {
+        return Status::InvalidArgument("Remote Doris v2 reader requires remote_doris_params");
+    }
+    const auto& params = range.table_format_params.remote_doris_params;
+    if (!params.__isset.location_uri || params.location_uri.empty()) {
+        return Status::InvalidArgument("Remote Doris v2 reader requires location_uri");
+    }
+    if (!params.__isset.ticket || params.ticket.empty()) {
+        return Status::InvalidArgument("Remote Doris v2 reader requires ticket");
+    }
+    return Status::OK();
+}
+
+class FlightRemoteDorisStream final : public RemoteDorisStream {
+public:
+    explicit FlightRemoteDorisStream(const TFileRangeDesc& range) : _range(range) {}
+
+    Status open() {
+        RETURN_IF_ERROR(validate_remote_doris_range(_range));
+        const auto& params = _range.table_format_params.remote_doris_params;
+        arrow::flight::Location location;
+        RETURN_DORIS_STATUS_IF_ERROR(
+                arrow::flight::Location::Parse(params.location_uri).Value(&location));
+        arrow::flight::Ticket ticket;
+        RETURN_DORIS_STATUS_IF_ERROR(
+                arrow::flight::Ticket::Deserialize(params.ticket).Value(&ticket));
+        RETURN_DORIS_STATUS_IF_ERROR(
+                arrow::flight::FlightClient::Connect(location).Value(&_flight_client));
+        RETURN_DORIS_STATUS_IF_ERROR(_flight_client->DoGet(ticket).Value(&_stream));
+        return Status::OK();
+    }
+
+    Status next(std::shared_ptr<arrow::RecordBatch>* batch) override {
+        DORIS_CHECK(batch != nullptr);
+        arrow::flight::FlightStreamChunk chunk;
+        RETURN_DORIS_STATUS_IF_ERROR(_stream->Next().Value(&chunk));
+        *batch = chunk.data;
+        return Status::OK();
+    }
+
+    Status close() override {
+        _stream.reset();
+        if (_flight_client != nullptr) {
+            RETURN_DORIS_STATUS_IF_ERROR(_flight_client->Close());
+            _flight_client.reset();
+        }
+        return Status::OK();
+    }
+
+private:
+    const TFileRangeDesc _range;
+    std::unique_ptr<arrow::flight::FlightClient> _flight_client;
+    std::unique_ptr<arrow::flight::FlightStreamReader> _stream;
+};
+
+Status create_flight_stream(const TFileRangeDesc& range, std::unique_ptr<RemoteDorisStream>* out) {
+    DORIS_CHECK(out != nullptr);
+    auto stream = std::make_unique<FlightRemoteDorisStream>(range);
+    RETURN_IF_ERROR(stream->open());
+    *out = std::move(stream);
+    return Status::OK();
+}
+
+ColumnDefinition remote_doris_child_definition(const std::string& name, DataTypePtr type,
+                                               int32_t local_id);
+
+std::vector<ColumnDefinition> synthesize_remote_doris_children(const DataTypePtr& type) {
+    std::vector<ColumnDefinition> children;
+    DORIS_CHECK(type != nullptr);
+    const auto nested_type = remove_nullable(type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_ARRAY: {
+        const auto* array_type = assert_cast<const DataTypeArray*>(nested_type.get());
+        children.push_back(
+                remote_doris_child_definition("element", array_type->get_nested_type(), 0));
+        break;
+    }
+    case TYPE_MAP: {
+        const auto* map_type = assert_cast<const DataTypeMap*>(nested_type.get());
+        children.push_back(remote_doris_child_definition("key", map_type->get_key_type(), 0));
+        children.push_back(remote_doris_child_definition("value", map_type->get_value_type(), 1));
+        break;
+    }
+    case TYPE_STRUCT: {
+        const auto* struct_type = assert_cast<const DataTypeStruct*>(nested_type.get());
+        children.reserve(struct_type->get_elements().size());
+        for (size_t idx = 0; idx < struct_type->get_elements().size(); ++idx) {
+            children.push_back(remote_doris_child_definition(struct_type->get_element_name(idx),
+                                                             struct_type->get_element(idx),
+                                                             cast_set<int32_t>(idx)));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return children;
+}
+
+ColumnDefinition remote_doris_child_definition(const std::string& name, DataTypePtr type,
+                                               int32_t local_id) {
+    ColumnDefinition child;
+    child.identifier = Field::create_field<TYPE_STRING>(name);
+    child.local_id = local_id;
+    child.name = name;
+    child.type = std::move(type);
+    child.children = synthesize_remote_doris_children(child.type);
+    return child;
+}
+
+} // namespace
+
+RemoteDorisFileReader::RemoteDorisFileReader(
+        std::shared_ptr<io::FileSystemProperties>& system_properties,
+        std::unique_ptr<io::FileDescription>& file_description,
+        std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile, const TFileRangeDesc& range,
+        const std::vector<SlotDescriptor*>& file_slot_descs,
+        RemoteDorisStreamFactory stream_factory)
+        : FileReader(system_properties, file_description, std::move(io_ctx), profile),
+          _range(range),
+          _file_slot_descs(file_slot_descs),
+          _stream_factory(std::move(stream_factory)) {
+    TimezoneUtils::find_cctz_time_zone(TimezoneUtils::default_time_zone, _ctz);
+}
+
+RemoteDorisFileReader::~RemoteDorisFileReader() {
+    static_cast<void>(close());
+}
+
+Status RemoteDorisFileReader::init(RuntimeState* state) {
+    (void)state;
+    RETURN_IF_ERROR(validate_remote_doris_range(_range));
+    RETURN_IF_ERROR(_build_col_name_to_file_id());
+    _eof = false;
+    return Status::OK();
+}
+
+Status RemoteDorisFileReader::get_schema(std::vector<ColumnDefinition>* file_schema) const {
+    DORIS_CHECK(file_schema != nullptr);
+    file_schema->clear();
+    file_schema->reserve(_file_slot_descs.size());
+    for (size_t idx = 0; idx < _file_slot_descs.size(); ++idx) {
+        const auto* slot = _file_slot_descs[idx];
+        DORIS_CHECK(slot != nullptr);
+        file_schema->push_back({
+                .identifier = Field::create_field<TYPE_INT>(cast_set<int32_t>(idx)),
+                .local_id = cast_set<int32_t>(idx),
+                .name = slot->col_name(),
+                .type = slot->type(),
+                // Remote Doris exposes table slots as file columns. Complex columns still need
+                // structural children so TableColumnMapper can validate and project them.
+                .children = synthesize_remote_doris_children(slot->type()),
+        });
+    }
+    return Status::OK();
+}
+
+Status RemoteDorisFileReader::open(std::shared_ptr<FileScanRequest> request) {
+    RETURN_IF_ERROR(FileReader::open(std::move(request)));
+    RETURN_IF_ERROR(_open_stream());
+    _eof = false;
+    return Status::OK();
+}
+
+Status RemoteDorisFileReader::get_block(Block* file_block, size_t* rows, bool* eof) {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    DORIS_CHECK(eof != nullptr);
+    if (_stream == nullptr) {
+        return Status::InternalError("Remote Doris v2 reader is not open");
+    }
+
+    *rows = 0;
+    *eof = false;
+    std::shared_ptr<arrow::RecordBatch> batch;
+    RETURN_IF_ERROR(_stream->next(&batch));
+    if (batch == nullptr) {
+        *eof = true;
+        _eof = true;
+        return Status::OK();
+    }
+
+    RETURN_IF_ERROR(_materialize_record_batch(*batch, file_block, rows));
+    RETURN_IF_ERROR(
+            apply_materialized_reader_filters(_request.get(), _io_ctx.get(), file_block, rows));
+    _reader_statistics.read_rows += *rows;
+    return Status::OK();
+}
+
+Status RemoteDorisFileReader::close() {
+    if (_stream != nullptr) {
+        RETURN_IF_ERROR(_stream->close());
+        _stream.reset();
+    }
+    _request.reset();
+    _eof = true;
+    return Status::OK();
+}
+
+Status RemoteDorisFileReader::_open_stream() {
+    DORIS_CHECK(_stream == nullptr);
+    if (_stream_factory) {
+        RETURN_IF_ERROR(_stream_factory(_range, &_stream));
+    } else {
+        RETURN_IF_ERROR(create_flight_stream(_range, &_stream));
+    }
+    DORIS_CHECK(_stream != nullptr);
+    return Status::OK();
+}
+
+Status RemoteDorisFileReader::_materialize_record_batch(const arrow::RecordBatch& batch,
+                                                        Block* file_block, size_t* rows) const {
+    DORIS_CHECK(file_block != nullptr);
+    DORIS_CHECK(rows != nullptr);
+    if (_request == nullptr) {
+        return Status::InternalError("Remote Doris v2 reader is not open");
+    }
+
+    std::vector<bool> materialized_columns(file_block->columns(), false);
+    for (int arrow_idx = 0; arrow_idx < batch.num_columns(); ++arrow_idx) {
+        const std::string& column_name = batch.schema()->field(arrow_idx)->name();
+        const auto file_id_it = _col_name_to_file_id.find(column_name);
+        if (file_id_it == _col_name_to_file_id.end()) {
+            return Status::InternalError("Remote Doris returned unknown column {}", column_name);
+        }
+        const auto block_position_it = _request->local_positions.find(file_id_it->second);
+        if (block_position_it == _request->local_positions.end()) {
+            continue;
+        }
+        RETURN_IF_ERROR(_materialize_arrow_column(batch, arrow_idx, file_id_it->second,
+                                                  block_position_it->second, file_block));
+        materialized_columns[block_position_it->second.value()] = true;
+    }
+
+    for (const auto& [file_column_id, block_position] : _request->local_positions) {
+        if (block_position.value() >= materialized_columns.size()) {
+            return Status::InternalError(
+                    "Remote Doris requested block position {} out of range, block columns {}",
+                    block_position.value(), materialized_columns.size());
+        }
+        if (!materialized_columns[block_position.value()]) {
+            return Status::InternalError("Remote Doris did not return requested file column id {}",
+                                         file_column_id.value());
+        }
+    }
+
+    *rows = cast_set<size_t>(batch.num_rows());
+    return Status::OK();
+}
+
+Status RemoteDorisFileReader::_materialize_arrow_column(const arrow::RecordBatch& batch,
+                                                        int arrow_column_idx,
+                                                        LocalColumnId file_column_id,
+                                                        const LocalIndex& block_position,
+                                                        Block* file_block) const {
+    DORIS_CHECK(file_block != nullptr);
+    if (block_position.value() >= file_block->columns()) {
+        return Status::InternalError(
+                "Remote Doris block position {} out of range, block columns {}",
+                block_position.value(), file_block->columns());
+    }
+    const auto column_name = batch.schema()->field(arrow_column_idx)->name();
+    auto columns_guard = file_block->mutate_columns_scoped();
+    auto& columns = columns_guard.mutable_columns();
+    try {
+        RETURN_IF_ERROR(columns_guard.get_datatype_by_position(block_position.value())
+                                ->get_serde()
+                                ->read_column_from_arrow(*columns[block_position.value()],
+                                                         batch.column(arrow_column_idx).get(), 0,
+                                                         batch.num_rows(), _ctz));
+    } catch (const Exception& e) {
+        return Status::InternalError(
+                "Failed to convert Remote Doris Arrow column '{}' (file_column_id={}) to Doris "
+                "block: {}",
+                column_name, file_column_id.value(), e.what());
+    }
+    return Status::OK();
+}
+
+Status RemoteDorisFileReader::_build_col_name_to_file_id() {
+    _col_name_to_file_id.clear();
+    _col_name_to_file_id.reserve(_file_slot_descs.size());
+    for (size_t idx = 0; idx < _file_slot_descs.size(); ++idx) {
+        const auto* slot = _file_slot_descs[idx];
+        DORIS_CHECK(slot != nullptr);
+        _col_name_to_file_id.emplace(slot->col_name(), LocalColumnId(cast_set<int32_t>(idx)));
+    }
+    return Status::OK();
+}
+
+RemoteDorisReader::RemoteDorisReader(RemoteDorisStreamFactory stream_factory)
+        : _stream_factory(std::move(stream_factory)) {}
+
+Status RemoteDorisReader::init(TableReadOptions&& options) {
+    if (options.file_slot_descs == nullptr) {
+        return Status::InvalidArgument("Remote Doris v2 reader requires file slot descriptors");
+    }
+    return TableReader::init(std::move(options));
+}
+
+Status RemoteDorisReader::prepare_split(const SplitReadOptions& options) {
+    RETURN_IF_ERROR(validate_remote_doris_range(options.current_range));
+    return TableReader::prepare_split(options);
+}
+
+Status RemoteDorisReader::create_file_reader(std::unique_ptr<FileReader>* reader) {
+    DORIS_CHECK(reader != nullptr);
+    DORIS_CHECK(_file_slot_descs != nullptr);
+    *reader = std::make_unique<RemoteDorisFileReader>(
+            _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+            _current_file_range_desc, *_file_slot_descs, _stream_factory);
+    return Status::OK();
+}
+
+} // namespace doris::format::remote_doris
diff --git a/be/src/format_v2/table/remote_doris_reader.h b/be/src/format_v2/table/remote_doris_reader.h
new file mode 100644
index 00000000000000..b4dd2a505a95ad
--- /dev/null
+++ b/be/src/format_v2/table/remote_doris_reader.h
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <arrow/record_batch.h>
+#include <cctz/time_zone.h>
+
+#include <cstddef>
+#include <functional>
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris {
+class Block;
+class RuntimeProfile;
+class RuntimeState;
+class SlotDescriptor;
+} // namespace doris
+
+namespace doris::format::remote_doris {
+
+// Small abstraction around Arrow Flight to keep Remote Doris v2 reader unit-testable without
+// starting a Flight server. Production code uses FlightRemoteDorisStream; tests can provide
+// RecordBatch-backed streams that exercise the same FileReader block materialization path.
+class RemoteDorisStream {
+public:
+    virtual ~RemoteDorisStream() = default;
+    virtual Status next(std::shared_ptr<arrow::RecordBatch>* batch) = 0;
+    virtual Status close() = 0;
+};
+
+using RemoteDorisStreamFactory =
+        std::function<Status(const TFileRangeDesc&, std::unique_ptr<RemoteDorisStream>*)>;
+
+class RemoteDorisFileReader final : public FileReader {
+public:
+    RemoteDorisFileReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                          std::unique_ptr<io::FileDescription>& file_description,
+                          std::shared_ptr<io::IOContext> io_ctx, RuntimeProfile* profile,
+                          const TFileRangeDesc& range,
+                          const std::vector<SlotDescriptor*>& file_slot_descs,
+                          RemoteDorisStreamFactory stream_factory = {});
+    ~RemoteDorisFileReader() override;
+
+    Status init(RuntimeState* state) override;
+    Status get_schema(std::vector<ColumnDefinition>* file_schema) const override;
+    Status open(std::shared_ptr<FileScanRequest> request) override;
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override;
+    Status close() override;
+
+private:
+    Status _open_stream();
+    Status _materialize_record_batch(const arrow::RecordBatch& batch, Block* file_block,
+                                     size_t* rows) const;
+    Status _materialize_arrow_column(const arrow::RecordBatch& batch, int arrow_column_idx,
+                                     LocalColumnId file_column_id, const LocalIndex& block_position,
+                                     Block* file_block) const;
+    Status _build_col_name_to_file_id();
+
+    const TFileRangeDesc _range;
+    const std::vector<SlotDescriptor*> _file_slot_descs;
+    RemoteDorisStreamFactory _stream_factory;
+    cctz::time_zone _ctz;
+    std::unique_ptr<RemoteDorisStream> _stream;
+    std::unordered_map<std::string, LocalColumnId> _col_name_to_file_id;
+};
+
+class RemoteDorisReader final : public TableReader {
+public:
+    explicit RemoteDorisReader(RemoteDorisStreamFactory stream_factory = {});
+
+    Status init(TableReadOptions&& options) override;
+    Status prepare_split(const SplitReadOptions& options) override;
+
+protected:
+    Status create_file_reader(std::unique_ptr<FileReader>* reader) override;
+
+private:
+    RemoteDorisStreamFactory _stream_factory;
+};
+
+} // namespace doris::format::remote_doris
diff --git a/be/src/format_v2/table/schema_history_util.cpp b/be/src/format_v2/table/schema_history_util.cpp
new file mode 100644
index 00000000000000..10109839e6987d
--- /dev/null
+++ b/be/src/format_v2/table/schema_history_util.cpp
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/schema_history_util.h"
+
+#include <algorithm>
+#include <ranges>
+#include <string>
+
+#include "core/field.h"
+#include "util/string_util.h"
+
+namespace doris::format {
+namespace {
+
+const schema::external::TField* get_field_ptr(const schema::external::TFieldPtr& field_ptr) {
+    if (!field_ptr.__isset.field_ptr || field_ptr.field_ptr == nullptr) {
+        return nullptr;
+    }
+    return field_ptr.field_ptr.get();
+}
+
+const schema::external::TField* find_child_field_by_name(
+        const std::vector<schema::external::TFieldPtr>& fields, const std::string& name) {
+    for (const auto& field_ptr : fields) {
+        const auto* field = get_field_ptr(field_ptr);
+        if (field == nullptr) {
+            continue;
+        }
+        if (field->__isset.name && to_lower(field->name) == to_lower(name)) {
+            return field;
+        }
+        if (field->__isset.name_mapping &&
+            std::ranges::any_of(field->name_mapping, [&](const std::string& alias) {
+                return to_lower(alias) == to_lower(name);
+            })) {
+            return field;
+        }
+    }
+    return nullptr;
+}
+
+void annotate_column_from_field(ColumnDefinition* column, const schema::external::TField& field);
+
+void annotate_struct_children(ColumnDefinition* column,
+                              const schema::external::TStructField& struct_field) {
+    DORIS_CHECK(column != nullptr);
+    if (!struct_field.__isset.fields) {
+        return;
+    }
+    for (auto& child : column->children) {
+        const auto* child_field = find_child_field_by_name(struct_field.fields, child.name);
+        if (child_field != nullptr) {
+            annotate_column_from_field(&child, *child_field);
+        }
+    }
+}
+
+void annotate_column_from_field(ColumnDefinition* column, const schema::external::TField& field) {
+    DORIS_CHECK(column != nullptr);
+    if (field.__isset.id) {
+        column->identifier = Field::create_field<TYPE_INT>(field.id);
+    }
+    column->name_mapping =
+            field.__isset.name_mapping ? field.name_mapping : std::vector<std::string> {};
+    if (!field.__isset.nestedField) {
+        return;
+    }
+    if (field.nestedField.__isset.struct_field) {
+        annotate_struct_children(column, field.nestedField.struct_field);
+    } else if (field.nestedField.__isset.array_field) {
+        if (column->children.empty() || !field.nestedField.array_field.__isset.item_field) {
+            return;
+        }
+        const auto* item_field = get_field_ptr(field.nestedField.array_field.item_field);
+        if (item_field != nullptr) {
+            annotate_column_from_field(&column->children.front(), *item_field);
+        }
+    } else if (field.nestedField.__isset.map_field) {
+        if (!column->children.empty() && field.nestedField.map_field.__isset.key_field) {
+            const auto* key_field = get_field_ptr(field.nestedField.map_field.key_field);
+            if (key_field != nullptr) {
+                annotate_column_from_field(&column->children.front(), *key_field);
+            }
+        }
+        if (column->children.size() > 1 && field.nestedField.map_field.__isset.value_field) {
+            const auto* value_field = get_field_ptr(field.nestedField.map_field.value_field);
+            if (value_field != nullptr) {
+                annotate_column_from_field(&column->children[1], *value_field);
+            }
+        }
+    }
+}
+
+} // namespace
+
+const schema::external::TSchema* find_history_schema(const TFileScanRangeParams* params,
+                                                     int64_t schema_id) {
+    if (params == nullptr || !params->__isset.history_schema_info) {
+        return nullptr;
+    }
+    for (const auto& schema : params->history_schema_info) {
+        if (schema.__isset.schema_id && schema.schema_id == schema_id) {
+            return &schema;
+        }
+    }
+    return nullptr;
+}
+
+bool can_map_by_history_schema(const TFileScanRangeParams* params, int64_t split_schema_id) {
+    if (split_schema_id < 0 || params == nullptr || !params->__isset.current_schema_id ||
+        !params->__isset.history_schema_info) {
+        return false;
+    }
+    return find_history_schema(params, split_schema_id) != nullptr;
+}
+
+Status annotate_file_schema_from_history(const TFileScanRangeParams* params,
+                                         int64_t split_schema_id,
+                                         std::vector<ColumnDefinition>* file_schema) {
+    DORIS_CHECK(file_schema != nullptr);
+    const auto* schema = find_history_schema(params, split_schema_id);
+    DORIS_CHECK(schema != nullptr);
+    if (!schema->__isset.root_field || !schema->root_field.__isset.fields) {
+        return Status::OK();
+    }
+    for (auto& column : *file_schema) {
+        const auto* field = find_child_field_by_name(schema->root_field.fields, column.name);
+        if (field != nullptr) {
+            annotate_column_from_field(&column, *field);
+        }
+    }
+    return Status::OK();
+}
+
+} // namespace doris::format
diff --git a/be/src/format_v2/table/schema_history_util.h b/be/src/format_v2/table/schema_history_util.h
new file mode 100644
index 00000000000000..3c4a80b5d4c975
--- /dev/null
+++ b/be/src/format_v2/table/schema_history_util.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "common/status.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/ExternalTableSchema_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format {
+
+const schema::external::TSchema* find_history_schema(const TFileScanRangeParams* params,
+                                                     int64_t schema_id);
+
+bool can_map_by_history_schema(const TFileScanRangeParams* params, int64_t split_schema_id);
+
+// Annotate a file-local schema with the field ids and name mappings from the historical table
+// schema that describes the current split. TableReader has already annotated projected table
+// columns from current_schema_id; this function performs the symmetric annotation for the file
+// schema so TableColumnMapper can match evolved Hudi/Paimon files by field id.
+Status annotate_file_schema_from_history(const TFileScanRangeParams* params,
+                                         int64_t split_schema_id,
+                                         std::vector<ColumnDefinition>* file_schema);
+
+} // namespace doris::format
diff --git a/be/src/format_v2/table_reader.cpp b/be/src/format_v2/table_reader.cpp
new file mode 100644
index 00000000000000..87d222c052f0a9
--- /dev/null
+++ b/be/src/format_v2/table_reader.cpp
@@ -0,0 +1,847 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table_reader.h"
+
+#include <gen_cpp/ExternalTableSchema_types.h>
+#include <gen_cpp/PlanNodes_types.h>
+#include <gen_cpp/Types_types.h>
+
+#include <algorithm>
+#include <cstring>
+#include <ranges>
+#include <set>
+#include <sstream>
+#include <stdexcept>
+#include <utility>
+#include <vector>
+
+#include "common/cast_set.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_struct.h"
+#include "exec/common/endian.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vslot_ref.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/delimited_text/csv_reader.h"
+#include "format_v2/delimited_text/text_reader.h"
+#include "format_v2/json/json_reader.h"
+#include "format_v2/native/native_reader.h"
+#include "format_v2/parquet/parquet_reader.h"
+#include "roaring/roaring64map.hh"
+#include "storage/segment/condition_cache.h"
+#include "util/string_util.h"
+
+namespace doris::format {
+namespace {
+
+template <typename T, typename Formatter>
+std::string join_table_reader_debug_strings(const std::vector<T>& values, Formatter formatter) {
+    std::ostringstream out;
+    out << "[";
+    for (size_t i = 0; i < values.size(); ++i) {
+        if (i > 0) {
+            out << ", ";
+        }
+        out << formatter(values[i]);
+    }
+    out << "]";
+    return out.str();
+}
+
+std::string file_format_to_string(FileFormat format) {
+    switch (format) {
+    case FileFormat::PARQUET:
+        return "PARQUET";
+    case FileFormat::ORC:
+        return "ORC";
+    case FileFormat::CSV:
+        return "CSV";
+    case FileFormat::JSON:
+        return "JSON";
+    case FileFormat::TEXT:
+        return "TEXT";
+    case FileFormat::JNI:
+        return "JNI";
+    case FileFormat::NATIVE:
+        return "NATIVE";
+    case FileFormat::ARROW:
+        return "ARROW";
+    }
+    return "UNKNOWN";
+}
+
+std::string push_down_agg_to_string(TPushAggOp::type op) {
+    switch (op) {
+    case TPushAggOp::NONE:
+        return "NONE";
+    case TPushAggOp::COUNT:
+        return "COUNT";
+    case TPushAggOp::MINMAX:
+        return "MINMAX";
+    case TPushAggOp::MIX:
+        return "MIX";
+    case TPushAggOp::COUNT_ON_INDEX:
+        return "COUNT_ON_INDEX";
+    }
+    return "UNKNOWN";
+}
+
+std::string current_file_debug_string(const std::unique_ptr<ScanTask>& task) {
+    if (task == nullptr || task->data_file == nullptr) {
+        return "null";
+    }
+    const auto& file = *task->data_file;
+    std::ostringstream out;
+    out << "FileDescription{path=" << file.path << ", file_size=" << file.file_size
+        << ", range_start_offset=" << file.range_start_offset << ", range_size=" << file.range_size
+        << ", mtime=" << file.mtime << ", fs_name=" << file.fs_name
+        << ", file_cache_admission=" << file.file_cache_admission << "}";
+    return out.str();
+}
+
+std::string partition_values_debug_string(const std::map<std::string, Field>& partition_values) {
+    std::ostringstream out;
+    out << "{";
+    size_t idx = 0;
+    for (const auto& [key, _] : partition_values) {
+        if (idx++ > 0) {
+            out << ", ";
+        }
+        out << key;
+    }
+    out << "}";
+    return out.str();
+}
+
+const schema::external::TField* get_field_ptr(const schema::external::TFieldPtr& field_ptr) {
+    if (!field_ptr.__isset.field_ptr || field_ptr.field_ptr == nullptr) {
+        return nullptr;
+    }
+    return field_ptr.field_ptr.get();
+}
+
+bool external_field_matches_name(const schema::external::TField& field, const std::string& name) {
+    if (field.__isset.name && to_lower(field.name) == to_lower(name)) {
+        return true;
+    }
+    return field.__isset.name_mapping &&
+           std::ranges::any_of(field.name_mapping, [&](const std::string& alias) {
+               return to_lower(alias) == to_lower(name);
+           });
+}
+
+DataTypePtr find_struct_child_type_by_external_field(const DataTypeStruct& struct_type,
+                                                     const schema::external::TField& field) {
+    for (size_t field_idx = 0; field_idx < struct_type.get_elements().size(); ++field_idx) {
+        if (external_field_matches_name(field, struct_type.get_element_name(field_idx))) {
+            return struct_type.get_element(field_idx);
+        }
+    }
+    return nullptr;
+}
+
+ColumnDefinition build_schema_column_from_external_field(const schema::external::TField& field,
+                                                         DataTypePtr type) {
+    ColumnDefinition column {
+            .identifier = field.__isset.id ? Field::create_field<TYPE_INT>(field.id) : Field {},
+            .name = field.__isset.name ? field.name : "",
+            .name_mapping =
+                    field.__isset.name_mapping ? field.name_mapping : std::vector<std::string> {},
+            .type = std::move(type),
+            .children = {},
+            .default_expr = nullptr,
+            .is_partition_key = false,
+    };
+    if (column.type == nullptr || !field.__isset.nestedField) {
+        return column;
+    }
+
+    const auto nested_type = remove_nullable(column.type);
+    switch (nested_type->get_primitive_type()) {
+    case TYPE_STRUCT: {
+        if (!field.nestedField.__isset.struct_field ||
+            !field.nestedField.struct_field.__isset.fields) {
+            return column;
+        }
+        const auto& struct_type = assert_cast<const DataTypeStruct&>(*nested_type);
+        for (const auto& child_ptr : field.nestedField.struct_field.fields) {
+            const auto* child_field = get_field_ptr(child_ptr);
+            if (child_field == nullptr || !child_field->__isset.name) {
+                continue;
+            }
+            auto child_type = find_struct_child_type_by_external_field(struct_type, *child_field);
+            if (child_type == nullptr) {
+                continue;
+            }
+            column.children.push_back(
+                    build_schema_column_from_external_field(*child_field, child_type));
+        }
+        break;
+    }
+    case TYPE_ARRAY: {
+        if (!field.nestedField.__isset.array_field ||
+            !field.nestedField.array_field.__isset.item_field) {
+            return column;
+        }
+        const auto* item_field = get_field_ptr(field.nestedField.array_field.item_field);
+        if (item_field == nullptr) {
+            return column;
+        }
+        const auto& array_type = assert_cast<const DataTypeArray&>(*nested_type);
+        auto child =
+                build_schema_column_from_external_field(*item_field, array_type.get_nested_type());
+        child.name = "element";
+        if (child.has_identifier_name()) {
+            child.identifier = Field::create_field<TYPE_STRING>(child.name);
+        }
+        column.children.push_back(std::move(child));
+        break;
+    }
+    case TYPE_MAP: {
+        if (!field.nestedField.__isset.map_field ||
+            !field.nestedField.map_field.__isset.key_field ||
+            !field.nestedField.map_field.__isset.value_field) {
+            return column;
+        }
+        const auto& map_type = assert_cast<const DataTypeMap&>(*nested_type);
+        const auto* key_field = get_field_ptr(field.nestedField.map_field.key_field);
+        if (key_field != nullptr) {
+            auto child =
+                    build_schema_column_from_external_field(*key_field, map_type.get_key_type());
+            child.name = "key";
+            if (child.has_identifier_name()) {
+                child.identifier = Field::create_field<TYPE_STRING>(child.name);
+            }
+            column.children.push_back(std::move(child));
+        }
+        const auto* value_field = get_field_ptr(field.nestedField.map_field.value_field);
+        if (value_field != nullptr) {
+            auto child = build_schema_column_from_external_field(*value_field,
+                                                                 map_type.get_value_type());
+            child.name = "value";
+            if (child.has_identifier_name()) {
+                child.identifier = Field::create_field<TYPE_STRING>(child.name);
+            }
+            column.children.push_back(std::move(child));
+        }
+        break;
+    }
+    default:
+        break;
+    }
+    return column;
+}
+
+const schema::external::TField* find_external_root_field(const TFileScanRangeParams* params,
+                                                         const ColumnDefinition& column) {
+    if (params == nullptr || !params->__isset.history_schema_info ||
+        params->history_schema_info.empty()) {
+        return nullptr;
+    }
+    const auto* schema = &params->history_schema_info.front();
+    if (params->__isset.current_schema_id) {
+        for (const auto& candidate_schema : params->history_schema_info) {
+            if (candidate_schema.__isset.schema_id &&
+                candidate_schema.schema_id == params->current_schema_id) {
+                schema = &candidate_schema;
+                break;
+            }
+        }
+    }
+    if (!schema->__isset.root_field || !schema->root_field.__isset.fields) {
+        return nullptr;
+    }
+    for (const auto& field_ptr : schema->root_field.fields) {
+        const auto* field = get_field_ptr(field_ptr);
+        if (field == nullptr) {
+            continue;
+        }
+        if (external_field_matches_name(*field, column.name)) {
+            return field;
+        }
+    }
+    return nullptr;
+}
+
+std::string expr_context_debug_string(const VExprContextSPtr& context) {
+    if (context == nullptr) {
+        return "null";
+    }
+    const auto root = context->root();
+    if (root == nullptr) {
+        return "VExprContext{root=null}";
+    }
+    std::ostringstream out;
+    out << "VExprContext{root_name=" << root->expr_name() << ", root_debug=" << root->debug_string()
+        << "}";
+    return out.str();
+}
+
+std::string table_filter_debug_string(const TableFilter& filter) {
+    std::ostringstream out;
+    out << "TableFilter{conjunct=" << expr_context_debug_string(filter.conjunct)
+        << ", global_indices="
+        << join_table_reader_debug_strings(
+                   filter.global_indices,
+                   [](GlobalIndex global_index) { return std::to_string(global_index.value()); })
+        << "}";
+    return out.str();
+}
+
+std::string table_column_predicates_debug_string(const TableColumnPredicates& predicates) {
+    std::ostringstream out;
+    out << "{";
+    size_t idx = 0;
+    for (const auto& [global_index, column_predicates] : predicates) {
+        if (idx++ > 0) {
+            out << ", ";
+        }
+        out << global_index.value() << ":{predicate_count=" << column_predicates.size() << "}";
+    }
+    out << "}";
+    return out.str();
+}
+
+bool contains_runtime_filter(const VExprContextSPtrs& conjuncts) {
+    return std::ranges::any_of(conjuncts, [](const auto& conjunct) {
+        return conjunct != nullptr && conjunct->root() != nullptr &&
+               conjunct->root()->is_rf_wrapper();
+    });
+}
+
+void collect_global_indices(const VExprSPtr& expr, std::set<GlobalIndex>* global_indices) {
+    if (expr == nullptr) {
+        return;
+    }
+    if (expr->is_rf_wrapper()) {
+        // RuntimeFilterExpr wraps a real predicate expression but its own thrift node can still
+        // look like SLOT_REF. Collect indices from the wrapped predicate; do not cast the wrapper
+        // itself to VSlotRef.
+        collect_global_indices(expr->get_impl(), global_indices);
+        return;
+    }
+    if (expr->is_slot_ref()) {
+        const auto* slot_ref = assert_cast<const VSlotRef*>(expr.get());
+        DORIS_CHECK(slot_ref->column_id() >= 0);
+        global_indices->insert(GlobalIndex(cast_set<size_t>(slot_ref->column_id())));
+    }
+    for (const auto& child : expr->children()) {
+        collect_global_indices(child, global_indices);
+    }
+}
+
+Status build_table_filters_from_conjunct(const VExprContextSPtr& conjunct, RuntimeState* state,
+                                         std::vector<TableFilter>* table_filters) {
+    if (conjunct == nullptr) {
+        return Status::OK();
+    }
+    std::set<GlobalIndex> global_indices;
+    collect_global_indices(conjunct->root(), &global_indices);
+    if (!global_indices.empty()) {
+        TableFilter table_filter;
+        VExprSPtr filter_root;
+        RETURN_IF_ERROR(clone_table_expr_tree(conjunct->root(), &filter_root));
+        table_filter.conjunct = VExprContext::create_shared(std::move(filter_root));
+        for (const auto global_index : global_indices) {
+            table_filter.global_indices.push_back(global_index);
+        }
+        table_filters->push_back(std::move(table_filter));
+    }
+    return Status::OK();
+}
+
+Status parse_deletion_vector(const char* buf, size_t buffer_size, DeleteFileDesc::Format format,
+                             DeleteRows* delete_rows) {
+    DORIS_CHECK(buf != nullptr);
+    DORIS_CHECK(delete_rows != nullptr);
+    DORIS_CHECK(format == DeleteFileDesc::Format::PAIMON ||
+                format == DeleteFileDesc::Format::ICEBERG);
+
+    const size_t checksum_size = format == DeleteFileDesc::Format::ICEBERG ? 4 : 0;
+    if (buffer_size < 8 + checksum_size) [[unlikely]] {
+        return Status::DataQualityError("Deletion vector file size too small: {}", buffer_size);
+    }
+
+    auto total_length = BigEndian::Load32(buf);
+    if (total_length + 4 + checksum_size != buffer_size) [[unlikely]] {
+        return Status::DataQualityError("Deletion vector length mismatch, expected: {}, actual: {}",
+                                        total_length + 4 + checksum_size, buffer_size);
+    }
+
+    const char* bitmap_buf = buf + 8;
+    const size_t bitmap_size = buffer_size - 8 - checksum_size;
+    if (format == DeleteFileDesc::Format::PAIMON) {
+        // Paimon BitmapDeletionVector stores:
+        //   [4-byte big-endian length][4-byte magic 0x5E43F2D0][32-bit roaring bitmap]
+        // The length covers magic + bitmap, and does not include the leading length field.
+        constexpr static char PAIMON_BITMAP_MAGIC[] = {'\x5E', '\x43', '\xF2', '\xD0'};
+        if (memcmp(buf + sizeof(total_length), PAIMON_BITMAP_MAGIC, 4) != 0) [[unlikely]] {
+            return Status::DataQualityError(
+                    "Paimon deletion vector magic number mismatch, expected: {}, actual: {}",
+                    BigEndian::Load32(PAIMON_BITMAP_MAGIC),
+                    BigEndian::Load32(buf + sizeof(total_length)));
+        }
+
+        roaring::Roaring bitmap;
+        try {
+            bitmap = roaring::Roaring::readSafe(bitmap_buf, bitmap_size);
+        } catch (const std::runtime_error& e) {
+            return Status::DataQualityError("Decode roaring bitmap failed, {}", e.what());
+        }
+
+        delete_rows->reserve(bitmap.cardinality());
+        for (auto it = bitmap.begin(); it != bitmap.end(); it++) {
+            delete_rows->push_back(*it);
+        }
+        return Status::OK();
+    }
+
+    constexpr static char ICEBERG_DV_MAGIC[] = {'\xD1', '\xD3', '\x39', '\x64'};
+    if (memcmp(buf + sizeof(total_length), ICEBERG_DV_MAGIC, 4) != 0) [[unlikely]] {
+        return Status::DataQualityError(
+                "Iceberg deletion vector magic number mismatch, expected: {}, actual: {}",
+                BigEndian::Load32(ICEBERG_DV_MAGIC), BigEndian::Load32(buf + sizeof(total_length)));
+    }
+
+    roaring::Roaring64Map bitmap;
+    try {
+        bitmap = roaring::Roaring64Map::readSafe(bitmap_buf, bitmap_size);
+    } catch (const std::runtime_error& e) {
+        return Status::DataQualityError("Decode roaring bitmap failed, {}", e.what());
+    }
+
+    delete_rows->reserve(bitmap.cardinality());
+    for (auto it = bitmap.begin(); it != bitmap.end(); it++) {
+        delete_rows->push_back(cast_set<int64_t>(*it));
+    }
+    return Status::OK();
+}
+
+} // namespace
+
+std::shared_ptr<io::FileSystemProperties> create_system_properties(
+        const TFileScanRangeParams* scan_params) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    if (scan_params == nullptr || !scan_params->__isset.file_type) {
+        system_properties->system_type = TFileType::FILE_LOCAL;
+        return system_properties;
+    }
+    system_properties->system_type = scan_params->file_type;
+    system_properties->properties = scan_params->properties;
+    system_properties->hdfs_params = scan_params->hdfs_params;
+    if (scan_params->__isset.broker_addresses) {
+        system_properties->broker_addresses.assign(scan_params->broker_addresses.begin(),
+                                                   scan_params->broker_addresses.end());
+    }
+    return system_properties;
+}
+
+std::string TableReader::debug_string() const {
+    std::ostringstream out;
+    out << "TableReader{format=" << file_format_to_string(_format)
+        << ", push_down_agg_type=" << push_down_agg_to_string(_push_down_agg_type)
+        << ", aggregate_pushdown_tried=" << _aggregate_pushdown_tried
+        << ", has_current_reader=" << (_data_reader.reader != nullptr)
+        << ", has_current_task=" << (_current_task != nullptr)
+        << ", current_file=" << current_file_debug_string(_current_task)
+        << ", has_delete_rows=" << (_delete_rows != nullptr)
+        << ", delete_row_count=" << (_delete_rows == nullptr ? 0 : _delete_rows->size())
+        << ", has_system_properties=" << (_system_properties != nullptr) << ", system_type="
+        << (_system_properties == nullptr ? static_cast<int>(TFileType::FILE_LOCAL)
+                                          : static_cast<int>(_system_properties->system_type))
+        << ", has_scan_params=" << (_scan_params != nullptr)
+        << ", has_io_ctx=" << (_io_ctx != nullptr)
+        << ", has_runtime_state=" << (_runtime_state != nullptr)
+        << ", has_scanner_profile=" << (_scanner_profile != nullptr)
+        << ", mapper_options=" << _mapper_options.debug_string() << ", projected_columns="
+        << join_table_reader_debug_strings(
+                   _projected_columns,
+                   [](const ColumnDefinition& column) { return column.debug_string(); })
+        << ", partition_values=" << partition_values_debug_string(_partition_values)
+        << ", table_filters="
+        << join_table_reader_debug_strings(
+                   _table_filters,
+                   [](const TableFilter& filter) { return table_filter_debug_string(filter); })
+        << ", table_column_predicates="
+        << table_column_predicates_debug_string(_table_column_predicates)
+        << ", conjunct_count=" << _conjuncts.size() << ", conjuncts="
+        << join_table_reader_debug_strings(_conjuncts,
+                                           [](const VExprContextSPtr& conjunct) {
+                                               return expr_context_debug_string(conjunct);
+                                           })
+        << ", file_schema="
+        << join_table_reader_debug_strings(
+                   _data_reader.file_schema,
+                   [](const ColumnDefinition& field) { return field.debug_string(); })
+        << ", file_block_layout="
+        << join_table_reader_debug_strings(
+                   _data_reader.file_block_layout,
+                   [](const FileBlockColumn& column) {
+                       std::ostringstream column_out;
+                       column_out << "FileBlockColumn{file_column_id=" << column.file_column_id
+                                  << ", name=" << column.name << ", type="
+                                  << (column.type == nullptr ? "null" : column.type->get_name())
+                                  << "}";
+                       return column_out.str();
+                   })
+        << ", block_template_columns=" << _data_reader.block_template.columns()
+        << ", column_mapper="
+        << (_data_reader.column_mapper == nullptr ? "null"
+                                                  : _data_reader.column_mapper->debug_string())
+        << "}";
+    return out.str();
+}
+
+Status TableReader::annotate_projected_column(const TFileScanSlotInfo& slot_info,
+                                              ProjectedColumnBuildContext* context,
+                                              ColumnDefinition* column) const {
+    (void)slot_info;
+    DORIS_CHECK(context != nullptr);
+    DORIS_CHECK(column != nullptr);
+    context->schema_column.reset();
+    const auto* schema_field = find_external_root_field(context->scan_params, *column);
+    if (schema_field == nullptr) {
+        return Status::OK();
+    }
+    context->schema_column = build_schema_column_from_external_field(*schema_field, column->type);
+    column->identifier = context->schema_column->identifier;
+    column->name_mapping = context->schema_column->name_mapping;
+    return Status::OK();
+}
+
+Status TableReader::init(TableReadOptions&& options) {
+    _scan_params = options.scan_params;
+    _format = options.format;
+    _io_ctx = options.io_ctx;
+    _runtime_state = options.runtime_state;
+    _scanner_profile = options.scanner_profile;
+    _file_slot_descs = options.file_slot_descs;
+    _push_down_agg_type = options.push_down_agg_type;
+    _condition_cache_digest = options.condition_cache_digest;
+    _projected_columns = std::move(options.projected_columns);
+    _system_properties = create_system_properties(_scan_params);
+    _mapper_options.mode = TableColumnMappingMode::BY_NAME;
+    _conjuncts = std::move(options.conjuncts);
+    _table_column_predicates = std::move(options.column_predicates);
+
+    if (_scanner_profile != nullptr) {
+        static const char* table_profile = "TableReader";
+        ADD_TIMER_WITH_LEVEL(_scanner_profile, table_profile, 1);
+        _profile.num_delete_files = ADD_CHILD_COUNTER_WITH_LEVEL(_scanner_profile, "NumDeleteFiles",
+                                                                 TUnit::UNIT, table_profile, 1);
+        _profile.num_delete_rows = ADD_CHILD_COUNTER_WITH_LEVEL(_scanner_profile, "NumDeleteRows",
+                                                                TUnit::UNIT, table_profile, 1);
+        _profile.parse_delete_file_time = ADD_CHILD_TIMER_WITH_LEVEL(
+                _scanner_profile, "ParseDeleteFileTime", table_profile, 1);
+        _profile.exec_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "GetBlockTime", table_profile, 1);
+        _profile.prepare_split_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "PrepareSplitTime", table_profile, 1);
+        _profile.finalize_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "FinalizeBlockTime", table_profile, 1);
+        _profile.create_reader_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "CreateReaderTime", table_profile, 1);
+        _profile.pushdown_agg_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "PushDownAggTime", table_profile, 1);
+        _profile.open_reader_timer =
+                ADD_CHILD_TIMER_WITH_LEVEL(_scanner_profile, "OpenReaderTime", table_profile, 1);
+    }
+    return Status::OK();
+}
+
+Status TableReader::_build_table_filters_from_conjuncts() {
+    _table_filters.clear();
+    for (const auto& conjunct : _conjuncts) {
+        RETURN_IF_ERROR(
+                build_table_filters_from_conjunct(conjunct, _runtime_state, &_table_filters));
+    }
+    return Status::OK();
+}
+
+Status TableReader::_open_local_filter_exprs(const FileScanRequest& file_request) {
+    RowDescriptor row_desc;
+    for (const auto& conjunct : file_request.conjuncts) {
+        RETURN_IF_ERROR(conjunct->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(conjunct->open(_runtime_state));
+    }
+    for (const auto& delete_conjunct : file_request.delete_conjuncts) {
+        RETURN_IF_ERROR(delete_conjunct->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(delete_conjunct->open(_runtime_state));
+    }
+    return Status::OK();
+}
+
+bool TableReader::_should_enable_condition_cache(const FileScanRequest& file_request) const {
+    if (_condition_cache_digest == 0 || _push_down_agg_type == TPushAggOp::type::COUNT ||
+        _current_file_description == std::nullopt || _data_reader.reader == nullptr) {
+        return false;
+    }
+    // Condition cache is populated by file readers after evaluating file-local row-level
+    // conjuncts. ColumnPredicate-only scans can prune row groups/pages, but they do not produce a
+    // per-row survivor bitmap that can safely populate the cache.
+    if (file_request.conjuncts.empty()) {
+        return false;
+    }
+    // Delete files/deletion vectors are table-format state. They may change independently of the
+    // data file path/mtime/size used by the external cache key, so caching their result can become
+    // stale. Keep delete filtering enabled, but do not read or write condition cache.
+    if (_delete_rows != nullptr || !file_request.delete_conjuncts.empty()) {
+        return false;
+    }
+    // Runtime filters can arrive late and their payload is not guaranteed to be represented by the
+    // scan-local digest. Without a read-only mode, a MISS could insert a bitmap for P AND RF under
+    // the digest for only P. This mirrors the old FileScanner guard.
+    return !contains_runtime_filter(file_request.conjuncts);
+}
+
+Status TableReader::_init_reader_condition_cache(const FileScanRequest& file_request) {
+    _condition_cache = nullptr;
+    _condition_cache_ctx = nullptr;
+    if (!_should_enable_condition_cache(file_request)) {
+        return Status::OK();
+    }
+
+    auto* cache = segment_v2::ConditionCache::instance();
+    if (cache == nullptr) {
+        return Status::OK();
+    }
+    const auto& file = *_current_file_description;
+    _condition_cache_key = segment_v2::ConditionCache::ExternalCacheKey(
+            file.path, file.mtime, file.file_size, _condition_cache_digest, file.range_start_offset,
+            file.range_size);
+
+    segment_v2::ConditionCacheHandle handle;
+    const bool condition_cache_hit = cache->lookup(_condition_cache_key, &handle);
+    if (condition_cache_hit) {
+        _condition_cache = handle.get_filter_result();
+        ++_condition_cache_hit_count;
+    } else {
+        const int64_t total_rows = _data_reader.reader->get_total_rows();
+        if (total_rows <= 0) {
+            return Status::OK();
+        }
+        // Add one guard granule for split ranges that start in the middle of a granule. A guard
+        // false bit beyond the real range never overlaps real rows, but avoids boundary overflow
+        // when a reader marks the last partial granule.
+        const size_t num_granules = (total_rows + ConditionCacheContext::GRANULE_SIZE - 1) /
+                                    ConditionCacheContext::GRANULE_SIZE;
+        _condition_cache = std::make_shared<std::vector<bool>>(num_granules + 1, false);
+    }
+
+    if (_condition_cache != nullptr) {
+        _condition_cache_ctx = std::make_shared<ConditionCacheContext>();
+        _condition_cache_ctx->is_hit = condition_cache_hit;
+        _condition_cache_ctx->filter_result = _condition_cache;
+        _data_reader.reader->set_condition_cache_context(_condition_cache_ctx);
+    }
+    return Status::OK();
+}
+
+void TableReader::_finalize_reader_condition_cache() {
+    if (_condition_cache_ctx == nullptr || _condition_cache_ctx->is_hit) {
+        _condition_cache = nullptr;
+        _condition_cache_ctx = nullptr;
+        return;
+    }
+    // LIMIT or scanner cancellation may close a reader before all selected row ranges are visited.
+    // Unvisited granules remain false in a MISS bitmap, so inserting a partial bitmap would make a
+    // later HIT skip valid rows. Only publish cache entries after the physical reader reaches EOF.
+    if (!_current_reader_reached_eof) {
+        _condition_cache = nullptr;
+        _condition_cache_ctx = nullptr;
+        return;
+    }
+    segment_v2::ConditionCache::instance()->insert(_condition_cache_key,
+                                                   std::move(_condition_cache));
+    _condition_cache = nullptr;
+    _condition_cache_ctx = nullptr;
+}
+
+Status TableReader::create_next_reader(bool* eos) {
+    SCOPED_TIMER(_profile.create_reader_timer);
+    DCHECK(_data_reader.reader == nullptr);
+    if (_current_task == nullptr) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    RETURN_IF_ERROR(create_file_reader(&_data_reader.reader));
+    DORIS_CHECK(_data_reader.reader != nullptr);
+    if (_batch_size > 0) {
+        _data_reader.reader->set_batch_size(_batch_size);
+    }
+    RETURN_IF_ERROR(_data_reader.reader->init(_runtime_state));
+    RETURN_IF_ERROR(open_reader());
+    if (_data_reader.reader == nullptr) {
+        *eos = _current_task == nullptr;
+        return Status::OK();
+    }
+    *eos = false;
+    return Status::OK();
+}
+
+Status TableReader::create_file_reader(std::unique_ptr<FileReader>* reader) {
+    DORIS_CHECK(reader != nullptr);
+    if (_format == FileFormat::PARQUET) {
+        const bool enable_mapping_timestamp_tz =
+                _scan_params != nullptr && _scan_params->__isset.enable_mapping_timestamp_tz &&
+                _scan_params->enable_mapping_timestamp_tz;
+        *reader = std::make_unique<format::parquet::ParquetReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+                _global_rowid_context, enable_mapping_timestamp_tz);
+        return Status::OK();
+    }
+    if (_format == FileFormat::CSV) {
+        if (_file_slot_descs == nullptr) {
+            return Status::InvalidArgument("CSV reader requires file slot descriptors");
+        }
+        // CSV has no embedded schema. TableReader owns table-level mapping, while CsvReader needs
+        // only the physical file slots plus scan text parameters to build a file-local schema.
+        // Non-file columns such as partitions/defaults/virtual row ids are intentionally excluded
+        // from `_file_slot_descs` and are materialized during finalize_chunk().
+        *reader = std::make_unique<format::csv::CsvReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+                _scan_params, *_file_slot_descs, _current_range_compress_type,
+                _current_range_load_id);
+        return Status::OK();
+    }
+    if (_format == FileFormat::TEXT) {
+        if (_file_slot_descs == nullptr) {
+            return Status::InvalidArgument("Text reader requires file slot descriptors");
+        }
+        // Text files have no embedded schema. As with CSV, TableReader handles table-level mapping
+        // and only passes physical file slots to the v2 TextReader.
+        *reader = std::make_unique<format::text::TextReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+                _scan_params, *_file_slot_descs, _current_range_compress_type,
+                _current_range_load_id);
+        return Status::OK();
+    }
+    if (_format == FileFormat::JSON) {
+        if (_file_slot_descs == nullptr) {
+            return Status::InvalidArgument("JSON reader requires file slot descriptors");
+        }
+        *reader = std::make_unique<format::json::JsonReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile,
+                _scan_params, _current_file_range_desc, *_file_slot_descs,
+                _current_range_compress_type, _current_range_load_id);
+        return Status::OK();
+    }
+    if (_format == FileFormat::NATIVE) {
+        *reader = std::make_unique<format::native::NativeReader>(
+                _system_properties, _current_task->data_file, _io_ctx, _scanner_profile);
+        return Status::OK();
+    }
+    return Status::NotSupported("TableReader does not support file format {}",
+                                file_format_to_string(_format));
+}
+
+std::unique_ptr<io::FileDescription> create_file_description(const TFileRangeDesc& range) {
+    auto file_description = std::make_unique<io::FileDescription>();
+    file_description->path = range.path;
+    file_description->file_size = range.__isset.file_size ? range.file_size : -1;
+    file_description->mtime = range.__isset.modification_time ? range.modification_time : 0;
+    file_description->range_start_offset = range.__isset.start_offset ? range.start_offset : 0;
+    file_description->range_size = range.__isset.size ? range.size : -1;
+    if (range.__isset.fs_name) {
+        file_description->fs_name = range.fs_name;
+    }
+    if (range.__isset.file_cache_admission) {
+        file_description->file_cache_admission = range.file_cache_admission;
+    }
+    return file_description;
+}
+
+Status TableReader::prepare_split(const SplitReadOptions& options) {
+    SCOPED_TIMER(_profile.prepare_split_timer);
+    // Update to current split format to handle ORC/PARQUET files in one table.
+    _format = options.current_split_format;
+    _partition_values = std::move(options.partition_values);
+    _current_task = std::make_unique<ScanTask>();
+    _current_task->data_file = create_file_description(options.current_range);
+    _current_file_description = *_current_task->data_file;
+    _current_file_range_desc = options.current_range;
+    _current_range_compress_type = options.current_range.__isset.compress_type
+                                           ? options.current_range.compress_type
+                                           : TFileCompressType::UNKNOWN;
+    _current_range_load_id = options.current_range.__isset.load_id
+                                     ? std::make_optional(options.current_range.load_id)
+                                     : std::nullopt;
+    _global_rowid_context = options.global_rowid_context;
+    _delete_rows = nullptr;
+    _aggregate_pushdown_tried = false;
+    _remaining_table_level_count = -1;
+    _current_reader_reached_eof = false;
+    if (_push_down_agg_type == TPushAggOp::type::COUNT &&
+        options.current_range.__isset.table_format_params &&
+        options.current_range.table_format_params.__isset.table_level_row_count) {
+        DORIS_CHECK(options.current_range.table_format_params.table_level_row_count >= -1);
+        _remaining_table_level_count =
+                options.current_range.table_format_params.table_level_row_count;
+    }
+    if (_is_table_level_count_active()) {
+        return Status::OK();
+    }
+    return _parse_delete_predicates(options);
+}
+
+Status TableReader::_parse_delete_predicates(const SplitReadOptions& options) {
+    DeleteFileDesc desc {.fs_name = options.current_range.fs_name};
+    bool has_delete_file = false;
+    RETURN_IF_ERROR(_parse_deletion_vector_file(options.current_range.table_format_params, &desc,
+                                                &has_delete_file));
+    if (has_delete_file) {
+        DORIS_CHECK(options.cache != nullptr);
+        Status create_status = Status::OK();
+
+        _delete_rows = options.cache->get<DeleteRows>(desc.key, [&]() -> DeleteRows* {
+            auto* delete_rows = new DeleteRows;
+
+            DeletionVectorReader dv_reader(_runtime_state, _scanner_profile, *_scan_params, desc,
+                                           _io_ctx.get());
+            create_status = dv_reader.open();
+            if (!create_status.ok()) [[unlikely]] {
+                return nullptr;
+            }
+
+            size_t bytes_read = desc.size;
+            std::vector<char> buffer(bytes_read);
+            create_status = dv_reader.read_at(desc.start_offset, {buffer.data(), bytes_read});
+            if (!create_status.ok()) [[unlikely]] {
+                return nullptr;
+            }
+
+            const char* buf = buffer.data();
+            SCOPED_TIMER(_profile.parse_delete_file_time);
+            create_status = parse_deletion_vector(buf, bytes_read, desc.format, delete_rows);
+            if (!create_status.ok()) [[unlikely]] {
+                return nullptr;
+            }
+            COUNTER_UPDATE(_profile.num_delete_rows, delete_rows->size());
+            return delete_rows;
+        });
+        RETURN_IF_ERROR(create_status);
+    }
+
+    return Status::OK();
+}
+} // namespace doris::format
diff --git a/be/src/format_v2/table_reader.h b/be/src/format_v2/table_reader.h
new file mode 100644
index 00000000000000..a557b22795ce0d
--- /dev/null
+++ b/be/src/format_v2/table_reader.h
@@ -0,0 +1,1565 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <bvar/status.h>
+
+#include <algorithm>
+#include <exception>
+#include <map>
+#include <memory>
+#include <optional>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "common/cast_set.h"
+#include "common/exception.h"
+#include "common/logging.h"
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_array.h"
+#include "core/column/column_const.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/field.h"
+#include "exec/common/stringop_substring.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vexpr_fwd.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/column_data.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/expr/cast.h"
+#include "format_v2/expr/delete_predicate.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/schema_projection.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/descriptors.h"
+#include "storage/segment/condition_cache.h"
+
+namespace doris {
+class Block;
+class ColumnPredicate;
+struct DeleteFileDesc;
+class RuntimeState;
+} // namespace doris
+
+namespace doris::format {
+
+using DeleteRows = std::vector<int64_t>;
+
+// Row-level predicates on table/global schema. They are rewritten to file-local expressions when
+// possible, and remain the source of row-level filtering after localization.
+struct TableFilter {
+    VExprContextSPtr conjunct;
+    std::vector<GlobalIndex> global_indices;
+};
+
+struct ScanTask {
+    virtual ~ScanTask() = default;
+
+    std::unique_ptr<io::FileDescription> data_file;
+};
+
+struct ProjectedColumnBuildContext {
+    const TFileScanRangeParams* scan_params = nullptr;
+    const TFileRangeDesc* range = nullptr;
+    RuntimeState* runtime_state = nullptr;
+    std::optional<ColumnDefinition> schema_column = std::nullopt;
+    size_t next_file_column_idx = 0;
+};
+
+struct ReadProfile {
+    RuntimeProfile::Counter* num_delete_files = nullptr;
+    RuntimeProfile::Counter* num_delete_rows = nullptr;
+    RuntimeProfile::Counter* parse_delete_file_time = nullptr;
+    RuntimeProfile::Counter* exec_timer = nullptr;
+    RuntimeProfile::Counter* prepare_split_timer = nullptr;
+    RuntimeProfile::Counter* finalize_timer = nullptr;
+    RuntimeProfile::Counter* create_reader_timer = nullptr;
+    RuntimeProfile::Counter* pushdown_agg_timer = nullptr;
+    RuntimeProfile::Counter* open_reader_timer = nullptr;
+};
+
+struct TableReadOptions {
+    // Columns need to be read from file and output by table reader. They are all in table/global
+    // schema semantics.
+    const std::vector<ColumnDefinition> projected_columns;
+    // Simple predicates for a single column, which is parsed on scan operator.
+    const TableColumnPredicates column_predicates;
+    // All complex conjuncts from scan operator
+    const VExprContextSPtrs conjuncts;
+    // File format of the underlying data files, needed for reader initialization and reader-level
+    // filter pushdown.
+    const FileFormat format;
+    TFileScanRangeParams* scan_params;
+    std::shared_ptr<io::IOContext> io_ctx;
+    RuntimeState* runtime_state;
+    RuntimeProfile* scanner_profile;
+    // File formats without complete self-describing metadata, such as CSV, Text, and JSON, need
+    // the FE-planned physical file slots to build their file-local schema and deserialize values.
+    const std::vector<SlotDescriptor*>* file_slot_descs = nullptr;
+    // Push-down aggregate type.
+    const TPushAggOp::type push_down_agg_type = TPushAggOp::type::NONE;
+    // Digest of stable pushed-down predicates. A zero digest disables condition cache.
+    uint64_t condition_cache_digest = 0;
+};
+
+struct SplitReadOptions {
+    // Split-level information for reader initialization, which may include file path, partition values, delete file info, etc. The content is table format specific and opaque to table reader base class; it's the responsibility of the concrete table reader implementation to parse necessary information for reader initialization and filter pushdown.
+    std::map<std::string, Field> partition_values;
+    ShardedKVCache* cache;
+    TFileRangeDesc current_range;
+    FileFormat current_split_format = FileFormat::PARQUET;
+    std::optional<GlobalRowIdContext> global_rowid_context;
+};
+
+// Base class for table-level readers.
+// This layer owns common table-level orchestration, such as split iteration, dynamic partition
+// pruning, delete handling and conversion from file-local blocks to table-schema blocks. Concrete
+// table-format readers only need to provide format-specific hooks for opening readers and parsing
+// split metadata.
+class TableReader {
+public:
+    virtual ~TableReader() = default;
+
+    // Initialize common runtime options for the table reader. Subclasses may call this from their
+    // own init(options); table-format schema and split metadata are provided later per split.
+    virtual Status init(TableReadOptions&& options);
+
+    // FileScannerV2 adjusts this before each get_block() using an adaptive bytes-per-row estimate.
+    // Store it here as well as forwarding to the current reader so newly opened split readers start
+    // with the latest predicted batch size.
+    void set_batch_size(size_t batch_size) {
+        _batch_size = std::max<size_t>(1, batch_size);
+        if (_data_reader.reader != nullptr) {
+            _data_reader.reader->set_batch_size(_batch_size);
+        }
+    }
+
+    // Prepare for reading a new split/task.
+    // 1. Pass a new split/task to reader, which will be used in subsequent open_reader() to initialize the underlying file reader.
+    // 2. Parse delete predicates from split/task information, which will be used for later dynamic filtering and delete handling.
+    virtual Status prepare_split(const SplitReadOptions& options);
+
+    // Public entry point for reading a table-schema block. The base class opens the current reader,
+    // advances across EOF, and closes exhausted readers. Subclasses provide protected hooks for
+    // table-format-specific behavior.
+    virtual Status get_block(Block* block, bool* eos) {
+        SCOPED_TIMER(_profile.exec_timer);
+        DORIS_CHECK(block->columns() == _projected_columns.size());
+        block->clear_column_data(_projected_columns.size());
+
+        while (true) {
+            if (*eos) {
+                return Status::OK();
+            }
+            if (!_data_reader.reader) {
+                if (_is_table_level_count_active()) {
+                    RETURN_IF_ERROR(_read_table_level_count(block, eos));
+                    return Status::OK();
+                }
+                RETURN_IF_ERROR(create_next_reader(eos));
+                if (!_data_reader.reader) {
+                    DCHECK(*eos);
+                    return Status::OK();
+                }
+            }
+
+            // Materialize a reduced row set for upper aggregate operators when aggregate
+            // pushdown can be applied. This is not the final aggregate result: COUNT emits
+            // `count` default rows for the upper COUNT(*), and MIN/MAX emits two rows containing
+            // file-level min/max values for the upper MIN/MAX.
+            if (!_aggregate_pushdown_tried) {
+                SCOPED_TIMER(_profile.pushdown_agg_timer);
+                bool pushed_down = false;
+                RETURN_IF_ERROR(_try_materialize_aggregate_pushdown_rows(block, &pushed_down));
+                if (pushed_down) {
+                    return Status::OK();
+                }
+            }
+
+            bool current_eof = false;
+            _data_reader.block_template.clear_column_data(
+                    cast_set<int64_t>(_data_reader.file_block_layout.size()));
+            size_t current_rows = 0;
+            RETURN_IF_ERROR(_data_reader.reader->get_block(&_data_reader.block_template,
+                                                           &current_rows, &current_eof));
+            if (current_rows == 0) {
+                if (current_eof) {
+                    _current_reader_reached_eof = true;
+                    RETURN_IF_ERROR(close_current_reader());
+                }
+                continue;
+            }
+            DCHECK_EQ(_data_reader.block_template.columns(), _data_reader.file_block_layout.size())
+                    << _data_reader.block_template.dump_structure();
+#ifndef NDEBUG
+            RETURN_IF_ERROR(_check_file_block_columns("after file reader get_block", current_rows));
+#endif
+            DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size());
+            RETURN_IF_ERROR(finalize_chunk(block, current_rows));
+#ifndef NDEBUG
+            RETURN_IF_ERROR(
+                    _check_table_block_columns("after finalize_chunk", block, current_rows));
+#endif
+            if (current_eof) {
+                _current_reader_reached_eof = true;
+                RETURN_IF_ERROR(close_current_reader());
+            }
+            return Status::OK();
+        }
+    }
+
+    // Close the table reader and the currently active file reader. Subclasses that hold additional
+    // table-format resources should override this and call TableReader::close() first.
+    virtual Status close() {
+        if (_data_reader.reader) {
+            RETURN_IF_ERROR(close_current_reader());
+        }
+        _current_task.reset();
+        _current_file_description.reset();
+        _remaining_table_level_count = -1;
+        return Status::OK();
+    }
+
+    int64_t condition_cache_hit_count() const { return _condition_cache_hit_count; }
+
+    virtual std::string debug_string() const;
+
+    virtual Status annotate_projected_column(const TFileScanSlotInfo& slot_info,
+                                             ProjectedColumnBuildContext* context,
+                                             ColumnDefinition* column) const;
+
+    virtual Status validate_projected_columns(const ProjectedColumnBuildContext& context) const {
+        (void)context;
+        return Status::OK();
+    }
+
+protected:
+    // Parse deletion vector information from table format specific file description.
+    virtual Status _parse_deletion_vector_file(const TTableFormatFileDesc& t_desc,
+                                               DeleteFileDesc* desc, bool* has_delete_file) {
+        *has_delete_file = false;
+        return Status::OK();
+    }
+
+    // Advance to the next reader. This closes the current reader first and then opens the next
+    // concrete reader. Subclasses should not duplicate this loop.
+    Status create_next_reader(bool* eos);
+    virtual Status create_file_reader(std::unique_ptr<FileReader>* reader);
+    virtual TableColumnMappingMode mapping_mode() const { return TableColumnMappingMode::BY_NAME; }
+    virtual Status annotate_file_schema(std::vector<ColumnDefinition>* file_schema) {
+        DORIS_CHECK(file_schema != nullptr);
+        return Status::OK();
+    }
+
+    // Open the concrete reader for the current split/task and build the file-local scan request.
+    virtual Status open_reader() {
+        SCOPED_TIMER(_profile.open_reader_timer);
+        // 1. Get file schema and create column mapping.
+        std::vector<ColumnDefinition> file_schema;
+        RETURN_IF_ERROR(_data_reader.reader->get_schema(&file_schema));
+        // For Paimon/Hudi, FE can provide field ids through `history_schema_info`. Annotate the
+        // file schema before column mapping when the table format maps columns by field id.
+        RETURN_IF_ERROR(annotate_file_schema(&file_schema));
+        _data_reader.file_schema = file_schema;
+        _mapper_options.mode = mapping_mode();
+
+        _data_reader.column_mapper = _data_reader.reader->create_column_mapper(_mapper_options);
+        DORIS_CHECK(_data_reader.column_mapper != nullptr);
+        RETURN_IF_ERROR(_data_reader.column_mapper->create_mapping(_projected_columns,
+                                                                   _partition_values, file_schema));
+        DORIS_CHECK(_data_reader.column_mapper->mappings().size() == _projected_columns.size());
+
+        // 2. Build table filters based on conjuncts and column predicates.
+        RETURN_IF_ERROR(_build_table_filters_from_conjuncts());
+
+        // 3. Create file scan request based on column mapping and table filters, then open file
+        // reader with the request. File scan request carries row-level expression filters and
+        // file-level pruning hints. Only expression filters decide returned rows; column predicates
+        // are pruning hints.
+        auto file_request = std::make_shared<FileScanRequest>();
+        RETURN_IF_ERROR(_data_reader.column_mapper->create_scan_request(
+                _table_filters, _table_column_predicates, _projected_columns, file_request.get(),
+                _runtime_state));
+        bool constant_filter_pruned_split = false;
+        RETURN_IF_ERROR(_evaluate_constant_filters(&constant_filter_pruned_split));
+        if (constant_filter_pruned_split) {
+            RETURN_IF_ERROR(close_current_reader());
+            return Status::OK();
+        }
+        RETURN_IF_ERROR(customize_file_scan_request(file_request.get()));
+        RETURN_IF_ERROR(_open_local_filter_exprs(*file_request));
+        _data_reader.file_block_layout.clear();
+        _data_reader.block_template.clear();
+        _data_reader.file_block_layout.resize(file_request->local_positions.size());
+
+        // 4. Build file block layout from file schema and column mapping. The layout describes
+        // the block returned by file reader before table-column materialization.
+        for (const auto& [file_column_id, block_position] : file_request->local_positions) {
+            DORIS_CHECK(block_position.value() < _data_reader.file_block_layout.size());
+            const auto* field = _find_column_definition(_data_reader.file_schema, file_column_id);
+            DORIS_CHECK(field != nullptr);
+
+            ColumnDefinition projected_field;
+            {
+                auto it = std::find_if(
+                        file_request->non_predicate_columns.begin(),
+                        file_request->non_predicate_columns.end(),
+                        [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; });
+                if (it != file_request->non_predicate_columns.end()) {
+                    RETURN_IF_ERROR(project_column_definition(*field, *it, &projected_field));
+                }
+            }
+            {
+                auto it = std::find_if(
+                        file_request->predicate_columns.begin(),
+                        file_request->predicate_columns.end(),
+                        [&](const LocalColumnIndex& p) { return p.column_id() == file_column_id; });
+                if (it != file_request->predicate_columns.end()) {
+                    RETURN_IF_ERROR(project_column_definition(*field, *it, &projected_field));
+                }
+            }
+            _data_reader.file_block_layout[block_position.value()] = {
+                    .file_column_id = file_column_id,
+                    .name = projected_field.name,
+                    .type = projected_field.type,
+            };
+            DORIS_CHECK(_data_reader.file_block_layout[block_position.value()].type != nullptr);
+        }
+
+        // 5. Prepare block template from file block layout. The block template stores the block
+        // returned by file reader before table-column materialization.
+        _data_reader.block_template.reserve(_data_reader.file_block_layout.size());
+        for (const auto& column : _data_reader.file_block_layout) {
+            _data_reader.block_template.insert(
+                    {column.type->create_column(), column.type, column.name});
+        }
+        if (VLOG_DEBUG_IS_ON) {
+            VLOG_DEBUG << "TableReader debug: " << debug_string();
+        }
+        RETURN_IF_ERROR(_open_mapping_exprs());
+        RETURN_IF_ERROR(_data_reader.reader->open(file_request));
+        RETURN_IF_ERROR(_init_reader_condition_cache(*file_request));
+        return Status::OK();
+    }
+
+    Status _build_table_filters_from_conjuncts();
+    Status _open_local_filter_exprs(const FileScanRequest& file_request);
+    Status _init_reader_condition_cache(const FileScanRequest& file_request);
+    void _finalize_reader_condition_cache();
+    bool _should_enable_condition_cache(const FileScanRequest& file_request) const;
+
+    Status _evaluate_constant_filters(bool* can_filter_all) {
+        DORIS_CHECK(can_filter_all != nullptr);
+        *can_filter_all = false;
+        for (const auto& table_filter : _table_filters) {
+            if (table_filter.conjunct == nullptr ||
+                // RuntimeFilterExpr does not implement execute_column_impl(); it is evaluated by
+                // the row-level filter path through execute_filter(). Constant split pruning uses
+                // VExprContext::execute() on a one-row synthetic block, so runtime filters must not
+                // be pre-executed here even when their referenced slot maps to a constant value.
+                table_filter.conjunct->root()->is_rf_wrapper() ||
+                !_table_filter_has_only_constant_entries(table_filter)) {
+                continue;
+            }
+            Block eval_block;
+            RETURN_IF_ERROR(_build_constant_filter_block(table_filter, &eval_block));
+            RowDescriptor row_desc;
+            RETURN_IF_ERROR(table_filter.conjunct->prepare(_runtime_state, row_desc));
+            RETURN_IF_ERROR(table_filter.conjunct->open(_runtime_state));
+            int result_column_id = -1;
+            RETURN_IF_ERROR(table_filter.conjunct->execute(&eval_block, &result_column_id));
+            DORIS_CHECK(result_column_id >= 0);
+            if (_filter_result_filters_all(eval_block.get_by_position(result_column_id).column)) {
+                *can_filter_all = true;
+                return Status::OK();
+            }
+        }
+        return Status::OK();
+    }
+
+    bool _table_filter_has_only_constant_entries(const TableFilter& table_filter) const {
+        const auto& filter_entries = _data_reader.column_mapper->filter_entries();
+        for (const auto global_index : table_filter.global_indices) {
+            const auto entry_it = filter_entries.find(global_index);
+            if (entry_it == filter_entries.end() || !entry_it->second.is_constant()) {
+                return false;
+            }
+        }
+        return !table_filter.global_indices.empty();
+    }
+
+    Status _build_constant_filter_block(const TableFilter& table_filter, Block* eval_block) {
+        DORIS_CHECK(eval_block != nullptr);
+        eval_block->clear();
+        const auto& mappings = _data_reader.column_mapper->mappings();
+        const auto& filter_entries = _data_reader.column_mapper->filter_entries();
+        DORIS_CHECK(mappings.size() == _projected_columns.size());
+        for (size_t column_idx = 0; column_idx < mappings.size(); ++column_idx) {
+            const auto global_index = GlobalIndex(column_idx);
+            const auto& mapping = mappings[column_idx];
+            const auto entry_it = filter_entries.find(global_index);
+            const bool referenced_by_filter =
+                    std::find(table_filter.global_indices.begin(),
+                              table_filter.global_indices.end(),
+                              global_index) != table_filter.global_indices.end();
+            if (referenced_by_filter && entry_it != filter_entries.end() &&
+                entry_it->second.is_constant()) {
+                ColumnPtr constant_column;
+                RETURN_IF_ERROR(_materialize_constant_filter_column(
+                        entry_it->second.constant_index(), &constant_column));
+                eval_block->insert({std::move(constant_column), mapping.table_type,
+                                    mapping.table_column_name});
+            } else {
+                eval_block->insert({mapping.table_type->create_column_const_with_default_value(1),
+                                    mapping.table_type, mapping.table_column_name});
+            }
+        }
+        return Status::OK();
+    }
+
+    Status _materialize_constant_filter_column(ConstantIndex constant_index, ColumnPtr* column) {
+        DORIS_CHECK(column != nullptr);
+        const auto& constant_entry = _data_reader.column_mapper->constant_map().get(constant_index);
+        DORIS_CHECK(constant_entry.expr != nullptr);
+        DORIS_CHECK(constant_entry.type != nullptr);
+        RowDescriptor row_desc;
+        RETURN_IF_ERROR(constant_entry.expr->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(constant_entry.expr->open(_runtime_state));
+        Block eval_block;
+        eval_block.insert({constant_entry.type->create_column_const_with_default_value(1),
+                           constant_entry.type, "__table_reader_constant_filter"});
+        int result_column_id = -1;
+        RETURN_IF_ERROR(constant_entry.expr->execute(&eval_block, &result_column_id));
+        DORIS_CHECK(result_column_id >= 0);
+        *column = eval_block.get_by_position(result_column_id).column;
+        DORIS_CHECK((*column)->size() == 1);
+        return Status::OK();
+    }
+
+    static bool _filter_result_filters_all(const ColumnPtr& filter_column) {
+        DORIS_CHECK(filter_column.get() != nullptr);
+        DORIS_CHECK(filter_column->size() == 1);
+        return !filter_column->get_bool(0);
+    }
+
+    virtual Status customize_file_scan_request(FileScanRequest* file_request) {
+        return _append_delete_predicate(file_request);
+    }
+
+    bool _is_table_level_count_active() const { return _remaining_table_level_count >= 0; }
+
+    Status _materialize_count_rows(size_t rows, Block* block) const {
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(block->columns() > 0 || rows == 0);
+        for (size_t column_idx = 0; column_idx < block->columns(); ++column_idx) {
+            auto column = block->get_by_position(column_idx).type->create_column();
+            column->resize(rows);
+            block->replace_by_position(column_idx, std::move(column));
+        }
+        return Status::OK();
+    }
+
+    Status _read_table_level_count(Block* block, bool* eos) {
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(eos != nullptr);
+        DORIS_CHECK(_push_down_agg_type == TPushAggOp::type::COUNT);
+        DORIS_CHECK(_remaining_table_level_count >= 0);
+        if (_remaining_table_level_count == 0) {
+            _remaining_table_level_count = -1;
+            _current_task.reset();
+            *eos = true;
+            return Status::OK();
+        }
+
+        const int64_t batch_size = _runtime_state == nullptr
+                                           ? _remaining_table_level_count
+                                           : static_cast<int64_t>(_runtime_state->batch_size());
+        const auto rows = std::min(_remaining_table_level_count, batch_size);
+        RETURN_IF_ERROR(_materialize_count_rows(cast_set<size_t>(rows), block));
+        _remaining_table_level_count -= rows;
+        *eos = false;
+        return Status::OK();
+    }
+
+    void _append_file_scan_column(FileScanRequest* request, LocalColumnId column_id,
+                                  std::vector<LocalColumnIndex>* scan_columns) {
+        DORIS_CHECK(request != nullptr);
+        DORIS_CHECK(scan_columns != nullptr);
+        FileScanRequestBuilder builder(request);
+        Status status;
+        if (scan_columns == &request->predicate_columns) {
+            status = builder.add_predicate_column(column_id);
+        } else {
+            DORIS_CHECK(scan_columns == &request->non_predicate_columns);
+            status = builder.add_non_predicate_column(column_id);
+        }
+        DORIS_CHECK(status.ok()) << status.to_string();
+        if (column_id == LocalColumnId(ROW_POSITION_COLUMN_ID) &&
+            _find_column_definition(_data_reader.file_schema, column_id) == nullptr) {
+            _data_reader.file_schema.push_back(row_position_column_definition());
+        }
+    }
+
+    // Append DeletePredicate to file scan request if there are deletes. The predicate will be evaluated in file reader level and filter out deleted rows before returning data to table reader.
+    Status _append_delete_predicate(FileScanRequest* request) {
+        DORIS_CHECK(request != nullptr);
+        if (_delete_rows == nullptr || _delete_rows->empty()) {
+            return Status::OK();
+        }
+        const auto row_position_column_id = LocalColumnId(ROW_POSITION_COLUMN_ID);
+        _append_file_scan_column(request, row_position_column_id, &request->predicate_columns);
+
+        auto delete_predicate = std::make_shared<DeletePredicate>(*_delete_rows);
+        const auto block_position = request->local_positions.at(row_position_column_id);
+        delete_predicate->add_child(VSlotRef::create_shared(
+                cast_set<int>(block_position.value()), cast_set<int>(block_position.value()), -1,
+                std::make_shared<DataTypeInt64>(), ROW_POSITION_COLUMN_NAME));
+
+        request->delete_conjuncts.push_back(
+                VExprContext::create_shared(std::move(delete_predicate)));
+        return Status::OK();
+    }
+
+    // Close the current concrete reader. This hook is called by both create_next_reader() and
+    // close(), so it should remain idempotent.
+    virtual Status close_current_reader() {
+        _finalize_reader_condition_cache();
+        RETURN_IF_ERROR(_data_reader.reader->close());
+        _data_reader.reader.reset();
+        if (_data_reader.column_mapper != nullptr) {
+            _data_reader.column_mapper->clear();
+            _data_reader.column_mapper.reset();
+        }
+        _table_filters.clear();
+        _data_reader.file_schema.clear();
+        _data_reader.file_block_layout.clear();
+        _data_reader.block_template.clear();
+        _current_task.reset();
+        _current_file_description.reset();
+        _current_reader_reached_eof = false;
+        return Status::OK();
+    }
+
+    // Finalize file-local block to table/global schema block.
+    Status finalize_chunk(Block* block, const size_t rows) {
+        SCOPED_TIMER(_profile.finalize_timer);
+        size_t idx = 0;
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            ColumnPtr column;
+            RETURN_IF_ERROR(_materialize_mapping_column(mapping, &_data_reader.block_template, rows,
+                                                        &column));
+            block->replace_by_position(idx, IColumn::mutate(std::move(column)));
+            idx++;
+        }
+        RETURN_IF_ERROR(materialize_virtual_columns(block));
+        // Enforce CHAR/VARCHAR length declared by the table schema after all file-to-table
+        // materialization has finished.
+        RETURN_IF_ERROR(_truncate_char_or_varchar_columns(block));
+        return Status::OK();
+    }
+
+    // Materialize virtual columns in the table block, such as Iceberg _row_id and
+    // _last_updated_sequence_number. This runs after normal column materialization so finalize
+    // expressions can reference those virtual columns.
+    virtual Status materialize_virtual_columns(Block* table_block) { return Status::OK(); }
+
+#ifndef NDEBUG
+    Status _check_file_block_columns(std::string_view stage, size_t rows) {
+        DORIS_CHECK(_data_reader.block_template.columns() == _data_reader.file_block_layout.size());
+        for (size_t idx = 0; idx < _data_reader.block_template.columns(); ++idx) {
+            const auto& file_block_column = _data_reader.file_block_layout[idx];
+            const auto& column_with_type = _data_reader.block_template.get_by_position(idx);
+            const auto* column = column_with_type.column.get();
+            try {
+                if (column == nullptr) {
+                    auto st = Status::InternalError(
+                            "Invalid file block column {} at {}: file_column_id={}, name='{}', "
+                            "type={}, column=null, expected_rows={}, reader={}",
+                            idx, stage, file_block_column.file_column_id.value(),
+                            file_block_column.name,
+                            file_block_column.type == nullptr ? "null"
+                                                              : file_block_column.type->get_name(),
+                            rows, debug_string());
+                    LOG(WARNING) << st;
+                    return st;
+                }
+                column->sanity_check();
+                auto st = column_with_type.check_type_and_column_match();
+                if (!st.ok()) {
+                    auto contextual_status = Status::InternalError(
+                            "Invalid file block column {} at {}: file_column_id={}, name='{}', "
+                            "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                            "reader={}",
+                            idx, stage, file_block_column.file_column_id.value(),
+                            file_block_column.name,
+                            file_block_column.type == nullptr ? "null"
+                                                              : file_block_column.type->get_name(),
+                            column->get_name(), column->size(), rows, st.to_string(),
+                            debug_string());
+                    LOG(WARNING) << contextual_status;
+                    return contextual_status;
+                }
+            } catch (const Exception& e) {
+                auto st = Status::InternalError(
+                        "Invalid file block column {} at {}: file_column_id={}, name='{}', "
+                        "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                        "reader={}",
+                        idx, stage, file_block_column.file_column_id.value(),
+                        file_block_column.name,
+                        file_block_column.type == nullptr ? "null"
+                                                          : file_block_column.type->get_name(),
+                        column == nullptr ? "null" : column->get_name(),
+                        column == nullptr ? 0 : column->size(), rows, e.to_string(),
+                        debug_string());
+                LOG(WARNING) << st;
+                return st;
+            } catch (const std::exception& e) {
+                auto st = Status::InternalError(
+                        "Invalid file block column {} at {}: file_column_id={}, name='{}', "
+                        "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                        "reader={}",
+                        idx, stage, file_block_column.file_column_id.value(),
+                        file_block_column.name,
+                        file_block_column.type == nullptr ? "null"
+                                                          : file_block_column.type->get_name(),
+                        column == nullptr ? "null" : column->get_name(),
+                        column == nullptr ? 0 : column->size(), rows, e.what(), debug_string());
+                LOG(WARNING) << st;
+                return st;
+            }
+        }
+        return Status::OK();
+    }
+
+    Status _check_table_block_columns(std::string_view stage, const Block* block, size_t rows) {
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size());
+        for (size_t idx = 0; idx < block->columns(); ++idx) {
+            const auto& mapping = _data_reader.column_mapper->mappings()[idx];
+            const auto& column_with_type = block->get_by_position(idx);
+            const auto* column = column_with_type.column.get();
+            try {
+                if (column == nullptr) {
+                    auto st = Status::InternalError(
+                            "Invalid table block column {} at {}: table_column='{}', "
+                            "global_index={}, type={}, column=null, expected_rows={}, mapping={}",
+                            idx, stage, mapping.table_column_name, mapping.global_index.value(),
+                            mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(),
+                            rows, mapping.debug_string());
+                    LOG(WARNING) << st;
+                    return st;
+                }
+                column->sanity_check();
+                auto st = column_with_type.check_type_and_column_match();
+                if (!st.ok()) {
+                    auto contextual_status = Status::InternalError(
+                            "Invalid table block column {} at {}: table_column='{}', "
+                            "global_index={}, type={}, column={}, column_size={}, "
+                            "expected_rows={}, error={}, mapping={}",
+                            idx, stage, mapping.table_column_name, mapping.global_index.value(),
+                            mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(),
+                            column->get_name(), column->size(), rows, st.to_string(),
+                            mapping.debug_string());
+                    LOG(WARNING) << contextual_status;
+                    return contextual_status;
+                }
+            } catch (const Exception& e) {
+                auto st = Status::InternalError(
+                        "Invalid table block column {} at {}: table_column='{}', global_index={}, "
+                        "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                        "mapping={}",
+                        idx, stage, mapping.table_column_name, mapping.global_index.value(),
+                        mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(),
+                        column == nullptr ? "null" : column->get_name(),
+                        column == nullptr ? 0 : column->size(), rows, e.to_string(),
+                        mapping.debug_string());
+                LOG(WARNING) << st;
+                return st;
+            } catch (const std::exception& e) {
+                auto st = Status::InternalError(
+                        "Invalid table block column {} at {}: table_column='{}', global_index={}, "
+                        "type={}, column={}, column_size={}, expected_rows={}, error={}, "
+                        "mapping={}",
+                        idx, stage, mapping.table_column_name, mapping.global_index.value(),
+                        mapping.table_type == nullptr ? "null" : mapping.table_type->get_name(),
+                        column == nullptr ? "null" : column->get_name(),
+                        column == nullptr ? 0 : column->size(), rows, e.what(),
+                        mapping.debug_string());
+                LOG(WARNING) << st;
+                return st;
+            }
+        }
+        return Status::OK();
+    }
+#endif
+
+    Status _truncate_char_or_varchar_columns(Block* block) {
+        DORIS_CHECK(block != nullptr);
+        if (_runtime_state == nullptr ||
+            !_runtime_state->query_options().truncate_char_or_varchar_columns) {
+            return Status::OK();
+        }
+        DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size());
+        for (size_t idx = 0; idx < _data_reader.column_mapper->mappings().size(); ++idx) {
+            const auto& mapping = _data_reader.column_mapper->mappings()[idx];
+            if (!_should_truncate_char_or_varchar_column(mapping)) {
+                continue;
+            }
+            const auto target_len =
+                    assert_cast<const DataTypeString*>(remove_nullable(mapping.table_type).get())
+                            ->len();
+            _truncate_char_or_varchar_column(block, idx, target_len);
+        }
+        return Status::OK();
+    }
+
+    // Return true when the table schema has a bounded CHAR/VARCHAR length that is stricter than
+    // the file-side type. Examples:
+    // - table VARCHAR(10), file VARCHAR(20): truncate to 10;
+    // - table VARCHAR(10), file STRING: truncate to 10 because STRING has no declared bound;
+    // - table STRING, any file type: no truncation because the target has no bound.
+    static bool _should_truncate_char_or_varchar_column(const ColumnMapping& mapping) {
+        if (mapping.table_type == nullptr) {
+            return false;
+        }
+        const auto table_type = remove_nullable(mapping.table_type);
+        const auto primitive_type = table_type->get_primitive_type();
+        if (primitive_type != TYPE_VARCHAR && primitive_type != TYPE_CHAR) {
+            return false;
+        }
+        const auto target_len = assert_cast<const DataTypeString*>(table_type.get())->len();
+        if (target_len <= 0) {
+            return false;
+        }
+        if (mapping.file_type == nullptr) {
+            return true;
+        }
+        const auto file_type = remove_nullable(mapping.file_type);
+        DORIS_CHECK(file_type != nullptr);
+        int file_len = -1;
+        if (file_type->get_primitive_type() == TYPE_VARCHAR ||
+            file_type->get_primitive_type() == TYPE_CHAR ||
+            file_type->get_primitive_type() == TYPE_STRING) {
+            file_len = assert_cast<const DataTypeString*>(file_type.get())->len();
+        }
+
+        return file_len < 0 || target_len < file_len;
+    }
+
+    // Truncate a materialized CHAR/VARCHAR column in place by reusing the vectorized substring
+    // implementation: substring(column, 1, len). Nullable columns are unwrapped before substring
+    // execution and wrapped back with the original null map afterward, because substring operates
+    // on the nested string payload only.
+    static void _truncate_char_or_varchar_column(Block* block, size_t idx, int len) {
+        DORIS_CHECK(block != nullptr);
+        auto int_type = std::make_shared<DataTypeInt32>();
+        const auto num_columns_without_result = cast_set<uint32_t>(block->columns());
+        auto& target = block->get_by_position(idx);
+        const bool is_nullable = target.type->is_nullable();
+        ColumnPtr input_column = target.column;
+        ColumnPtr null_map_column;
+        if (is_nullable) {
+            const auto* nullable_column = assert_cast<const ColumnNullable*>(target.column.get());
+            input_column = nullable_column->get_nested_column_ptr();
+            null_map_column = nullable_column->get_null_map_column_ptr();
+        }
+        block->replace_by_position(idx, std::move(input_column));
+        block->insert({int_type->create_column_const(block->rows(), to_field<TYPE_INT>(1)),
+                       int_type, "const 1"});
+        block->insert({int_type->create_column_const(block->rows(), to_field<TYPE_INT>(len)),
+                       int_type, "const len"});
+        block->insert({nullptr, std::make_shared<DataTypeString>(), "result"});
+
+        ColumnNumbers temp_arguments(3);
+        temp_arguments[0] = cast_set<uint32_t>(idx);
+        temp_arguments[1] = num_columns_without_result;
+        temp_arguments[2] = num_columns_without_result + 1;
+        const uint32_t result_column_id = num_columns_without_result + 2;
+        SubstringUtil::substring_execute(*block, temp_arguments, result_column_id, block->rows());
+
+        ColumnPtr result_column = block->get_by_position(result_column_id).column;
+        if (is_nullable) {
+            result_column = ColumnNullable::create(std::move(result_column), null_map_column);
+        }
+        block->replace_by_position(idx, std::move(result_column));
+        block->erase_tail(num_columns_without_result);
+    }
+
+    Status _try_materialize_aggregate_pushdown_rows(Block* block, bool* pushed_down) {
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(pushed_down != nullptr);
+        *pushed_down = false;
+        block->clear_column_data(_projected_columns.size());
+        _aggregate_pushdown_tried = true;
+        if (!_supports_aggregate_pushdown(_push_down_agg_type)) {
+            return Status::OK();
+        }
+
+        FileAggregateRequest file_request;
+        RETURN_IF_ERROR(_build_file_aggregate_request(_push_down_agg_type, &file_request));
+        FileAggregateResult file_result;
+        const auto status = _data_reader.reader->get_aggregate_result(file_request, &file_result);
+        if (status.is<ErrorCode::NOT_IMPLEMENTED_ERROR>()) {
+            return Status::OK();
+        }
+        RETURN_IF_ERROR(status);
+        RETURN_IF_ERROR(
+                _materialize_aggregate_pushdown_rows(_push_down_agg_type, file_result, block));
+        *pushed_down = true;
+        RETURN_IF_ERROR(close_current_reader());
+        return Status::OK();
+    }
+
+    virtual bool _supports_aggregate_pushdown(TPushAggOp::type agg_type) const {
+        // Only COUNT and MIN/MAX can be push down.
+        if (agg_type != TPushAggOp::type::COUNT && agg_type != TPushAggOp::type::MINMAX) {
+            return false;
+        }
+        // Only support aggregate pushdown when there is no delete, filter and column predicate, so
+        // the reduced rows consumed by the upper aggregate remain semantically equivalent to a
+        // normal scan.
+        if (_delete_rows != nullptr && !_delete_rows->empty()) {
+            return false;
+        }
+        if (!_table_filters.empty() || !_table_column_predicates.empty()) {
+            return false;
+        }
+        if (agg_type == TPushAggOp::type::COUNT) {
+            return true;
+        }
+        // For MIN/MAX, only support direct file-to-table column mappings. The two emitted rows
+        // must be enough for the upper MIN/MAX aggregate without evaluating default expressions or
+        // virtual columns.
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            if (!mapping.file_local_id.has_value() ||
+                mapping.virtual_column_type != TableVirtualColumnType::INVALID ||
+                mapping.default_expr != nullptr || mapping.file_type == nullptr ||
+                mapping.table_type == nullptr) {
+                return false;
+            }
+            if (!_can_push_down_minmax_for_mapping(mapping)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    static ColumnPtr _detach_column(ColumnPtr column) {
+        DORIS_CHECK(column.get() != nullptr);
+        return IColumn::mutate(std::move(column));
+    }
+
+    static Status _align_column_nullability(ColumnPtr* column, const DataTypePtr& table_type) {
+        DORIS_CHECK(column != nullptr);
+        DORIS_CHECK(column->get() != nullptr);
+        DORIS_CHECK(table_type != nullptr);
+        // Must return non-const column
+        *column = (*column)->convert_to_full_column_if_const();
+        if (table_type->is_nullable()) {
+            const auto& nested_type =
+                    assert_cast<const DataTypeNullable&>(*table_type).get_nested_type();
+            if (!(*column)->is_nullable()) {
+                RETURN_IF_ERROR(_align_column_nullability(column, nested_type));
+                *column = make_nullable(*column);
+                return Status::OK();
+            }
+            const auto& nullable_column = assert_cast<const ColumnNullable&>(**column);
+            ColumnPtr nested_column = nullable_column.get_nested_column_ptr();
+            RETURN_IF_ERROR(_align_column_nullability(&nested_column, nested_type));
+            *column = ColumnNullable::create(nested_column,
+                                             nullable_column.get_null_map_column_ptr());
+            return Status::OK();
+        }
+        if ((*column)->is_nullable()) {
+            const auto& nullable_column = assert_cast<const ColumnNullable&>(**column);
+            if (nullable_column.has_null()) {
+                return Status::InternalError(
+                        "Default expression produced NULL for non-nullable table column");
+            }
+            ColumnPtr nested_column = nullable_column.get_nested_column_ptr();
+            RETURN_IF_ERROR(_align_column_nullability(&nested_column, table_type));
+            *column = nested_column;
+            return Status::OK();
+        }
+        if (const auto* array_type = typeid_cast<const DataTypeArray*>(table_type.get())) {
+            const auto& array_column = assert_cast<const ColumnArray&>(**column);
+            ColumnPtr nested_column = array_column.get_data_ptr();
+            RETURN_IF_ERROR(
+                    _align_column_nullability(&nested_column, array_type->get_nested_type()));
+            *column = ColumnArray::create(nested_column, array_column.get_offsets_ptr());
+            return Status::OK();
+        }
+        if (const auto* map_type = typeid_cast<const DataTypeMap*>(table_type.get())) {
+            const auto& map_column = assert_cast<const ColumnMap&>(**column);
+            ColumnPtr key_column = map_column.get_keys_ptr();
+            ColumnPtr value_column = map_column.get_values_ptr();
+            RETURN_IF_ERROR(_align_column_nullability(&key_column, map_type->get_key_type()));
+            RETURN_IF_ERROR(_align_column_nullability(&value_column, map_type->get_value_type()));
+            *column = ColumnMap::create(key_column, value_column, map_column.get_offsets_ptr());
+            return Status::OK();
+        }
+        if (const auto* struct_type = typeid_cast<const DataTypeStruct*>(table_type.get())) {
+            const auto& struct_column = assert_cast<const ColumnStruct&>(**column);
+            Columns columns = struct_column.get_columns_copy();
+            DORIS_CHECK(columns.size() == struct_type->get_elements().size());
+            for (size_t i = 0; i < columns.size(); ++i) {
+                RETURN_IF_ERROR(
+                        _align_column_nullability(&columns[i], struct_type->get_element(i)));
+            }
+            *column = ColumnStruct::create(columns);
+            return Status::OK();
+        }
+        return Status::OK();
+    }
+
+    static Status _execute_default_expr_without_root_type_check(
+            const VExprContextSPtr& default_expr, const Block* block,
+            ColumnWithTypeAndName* result_data) {
+        DORIS_CHECK(default_expr != nullptr);
+        DORIS_CHECK(block != nullptr);
+        DORIS_CHECK(result_data != nullptr);
+        ColumnPtr result_column;
+        Status st;
+        RETURN_IF_CATCH_EXCEPTION({
+            st = default_expr->root()->execute_column_impl(default_expr.get(), block, nullptr,
+                                                           block->rows(), result_column);
+        });
+        RETURN_IF_ERROR(st);
+        DORIS_CHECK(result_column.get() != nullptr);
+        if (result_column->size() != block->rows()) {
+            return Status::InternalError(
+                    "Default expr {} return column size {} not equal to expected size {}",
+                    default_expr->expr_name(), result_column->size(), block->rows());
+        }
+        result_data->column = result_column;
+        result_data->type = default_expr->execute_type(block);
+        result_data->name = default_expr->expr_name();
+        return Status::OK();
+    }
+
+    Status _cast_column_to_type(ColumnPtr* column, const DataTypePtr& file_type,
+                                const DataTypePtr& table_type,
+                                const std::string& column_name) const {
+        DORIS_CHECK(column != nullptr);
+        DORIS_CHECK(column->get() != nullptr);
+        DORIS_CHECK(file_type != nullptr);
+        DORIS_CHECK(table_type != nullptr);
+        if (file_type->equals(*table_type)) {
+            return Status::OK();
+        }
+
+        DataTypePtr input_type = file_type;
+        if ((*column)->is_nullable() && !input_type->is_nullable()) {
+            input_type = make_nullable(input_type);
+        }
+        Block cast_block;
+        cast_block.insert({*column, input_type, column_name});
+        auto slot_ref = VSlotRef::create_shared(0, 0, -1, input_type, column_name);
+        auto cast_expr = Cast::create_shared(table_type);
+        cast_expr->add_child(std::move(slot_ref));
+        auto cast_ctx = VExprContext::create_shared(std::move(cast_expr));
+        RowDescriptor row_desc;
+        RETURN_IF_ERROR(cast_ctx->prepare(_runtime_state, row_desc));
+        RETURN_IF_ERROR(cast_ctx->open(_runtime_state));
+        ColumnPtr cast_column;
+        RETURN_IF_ERROR(cast_ctx->execute(&cast_block, cast_column));
+        *column = std::move(cast_column);
+        return Status::OK();
+    }
+
+    Status _materialize_present_child_mapping_column(const ColumnMapping& mapping,
+                                                     const ColumnPtr& file_column,
+                                                     const size_t rows, ColumnPtr* column) {
+        DORIS_CHECK(column != nullptr);
+        DORIS_CHECK(mapping.file_type != nullptr);
+        DORIS_CHECK(mapping.table_type != nullptr);
+        *column = file_column;
+        if (!mapping.is_trivial) {
+            if (!mapping.child_mappings.empty()) {
+                RETURN_IF_ERROR(
+                        _materialize_complex_mapping_column(mapping, *column, rows, column));
+            } else {
+                RETURN_IF_ERROR(_cast_column_to_type(column, mapping.file_type, mapping.table_type,
+                                                     mapping.file_column_name));
+            }
+        }
+        RETURN_IF_ERROR(_align_column_nullability(column, mapping.table_type));
+        return Status::OK();
+    }
+
+    Status _materialize_mapping_column(const ColumnMapping& mapping, Block* current_block,
+                                       const size_t rows, ColumnPtr* column) {
+        if (!mapping.is_trivial && mapping.file_local_id.has_value() &&
+            !mapping.child_mappings.empty()) {
+            DCHECK(mapping.projection != nullptr);
+            int res_id;
+            auto st = mapping.projection->execute(current_block, &res_id);
+            if (!st.ok()) {
+                return Status::InternalError(
+                        "Failed to execute complex mapping projection for table column '{}' "
+                        "(global_index={}, file_local_id={}, rows={}): {}, mapping={}",
+                        mapping.table_column_name, mapping.global_index.value(),
+                        *mapping.file_local_id, rows, st.to_string(), mapping.debug_string());
+            }
+            ColumnPtr result_column = current_block->get_by_position(res_id).column;
+            RETURN_IF_ERROR(
+                    _materialize_complex_mapping_column(mapping, result_column, rows, column));
+            return Status::OK();
+        }
+        if (mapping.projection != nullptr) {
+            int res_id;
+            auto st = mapping.projection->execute(current_block, &res_id);
+            if (!st.ok()) {
+                std::string file_local_id = "null";
+                if (mapping.file_local_id.has_value()) {
+                    file_local_id = std::to_string(*mapping.file_local_id);
+                }
+                return Status::InternalError(
+                        "Failed to execute mapping projection for table column '{}' "
+                        "(global_index={}, file_local_id={}, rows={}): {}, mapping={}",
+                        mapping.table_column_name, mapping.global_index.value(), file_local_id,
+                        rows, st.to_string(), mapping.debug_string());
+            }
+            ColumnPtr result_column = current_block->get_by_position(res_id).column;
+            *column = _detach_column(std::move(result_column));
+            return Status::OK();
+        }
+        if (mapping.default_expr != nullptr) {
+            if (current_block->rows() == rows) {
+                ColumnWithTypeAndName result;
+                RETURN_IF_ERROR(_execute_default_expr_without_root_type_check(
+                        mapping.default_expr, current_block, &result));
+                ColumnPtr result_column = result.column;
+                RETURN_IF_ERROR(_align_column_nullability(&result_column, mapping.table_type));
+                *column = _detach_column(std::move(result_column));
+            } else {
+                DORIS_CHECK(mapping.constant_index.has_value());
+                Block eval_block;
+                eval_block.insert({mapping.table_type->create_column_const_with_default_value(rows),
+                                   mapping.table_type, "__table_reader_const_rows"});
+                ColumnWithTypeAndName result;
+                RETURN_IF_ERROR(_execute_default_expr_without_root_type_check(
+                        mapping.default_expr, &eval_block, &result));
+                ColumnPtr result_column = result.column;
+                RETURN_IF_ERROR(_align_column_nullability(&result_column, mapping.table_type));
+                *column = _detach_column(std::move(result_column));
+            }
+            return Status::OK();
+        }
+        ColumnPtr result_column = mapping.table_type->create_column_const_with_default_value(rows);
+        *column = _detach_column(std::move(result_column));
+        return Status::OK();
+    }
+
+    Status _materialize_complex_mapping_column(const ColumnMapping& mapping,
+                                               const ColumnPtr& file_column, const size_t rows,
+                                               ColumnPtr* column) {
+        DORIS_CHECK(mapping.table_type != nullptr);
+        DORIS_CHECK(file_column.get() != nullptr);
+        const auto table_type = remove_nullable(mapping.table_type);
+        switch (table_type->get_primitive_type()) {
+        case TYPE_STRUCT:
+            RETURN_IF_ERROR(_materialize_struct_mapping_column(mapping, file_column, rows, column));
+            break;
+        case TYPE_ARRAY:
+            RETURN_IF_ERROR(_materialize_array_mapping_column(mapping, file_column, rows, column));
+            break;
+        case TYPE_MAP:
+            RETURN_IF_ERROR(_materialize_map_mapping_column(mapping, file_column, rows, column));
+            break;
+        default:
+            *column = _detach_column(file_column);
+            break;
+        }
+        return Status::OK();
+    }
+
+    static std::vector<const ColumnMapping*> _present_child_mappings_in_file_order(
+            const std::vector<ColumnMapping>& child_mappings) {
+        std::vector<const ColumnMapping*> result;
+        result.reserve(child_mappings.size());
+        for (const auto& child_mapping : child_mappings) {
+            if (child_mapping.file_local_id.has_value()) {
+                result.push_back(&child_mapping);
+            }
+        }
+        std::ranges::sort(result, [](const ColumnMapping* lhs, const ColumnMapping* rhs) {
+            DORIS_CHECK(lhs->file_local_id.has_value());
+            DORIS_CHECK(rhs->file_local_id.has_value());
+            return *lhs->file_local_id < *rhs->file_local_id;
+        });
+        return result;
+    }
+
+    static size_t _file_child_ordinal_for_mapping(
+            const ColumnMapping& mapping, const ColumnMapping& child_mapping,
+            const std::vector<const ColumnMapping*>& file_ordered_children) {
+        DORIS_CHECK(child_mapping.file_local_id.has_value());
+        if (!mapping.projected_file_children.empty()) {
+            const auto child_it = std::ranges::find_if(
+                    mapping.projected_file_children, [&](const ColumnDefinition& file_child) {
+                        return file_child.file_local_id() == *child_mapping.file_local_id;
+                    });
+            DORIS_CHECK(child_it != mapping.projected_file_children.end());
+            return static_cast<size_t>(
+                    std::distance(mapping.projected_file_children.begin(), child_it));
+        }
+        const auto child_it = std::ranges::find(file_ordered_children, &child_mapping);
+        DORIS_CHECK(child_it != file_ordered_children.end());
+        return static_cast<size_t>(std::distance(file_ordered_children.begin(), child_it));
+    }
+
+    static std::vector<const ColumnMapping*> _child_mappings_in_table_type_order(
+            const ColumnMapping& mapping, const DataTypeStruct& table_type) {
+        std::vector<const ColumnMapping*> result;
+        result.reserve(mapping.child_mappings.size());
+        for (size_t child_idx = 0; child_idx < table_type.get_elements().size(); ++child_idx) {
+            const auto& child_name = table_type.get_element_name(child_idx);
+            const auto child_it = std::ranges::find_if(
+                    mapping.child_mappings, [&](const ColumnMapping& child_mapping) {
+                        return child_mapping.table_column_name == child_name;
+                    });
+            DORIS_CHECK(child_it != mapping.child_mappings.end())
+                    << mapping.debug_string() << ", table_child_name=" << child_name;
+            result.push_back(&*child_it);
+        }
+        return result;
+    }
+
+    static const IColumn* _nested_column_if_nullable(const ColumnPtr& column,
+                                                     const NullMap** null_map) {
+        DORIS_CHECK(column.get() != nullptr);
+        if (const auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+            if (null_map != nullptr) {
+                *null_map = &nullable_column->get_null_map_data();
+            }
+            return &nullable_column->get_nested_column();
+        }
+        return column.get();
+    }
+
+    Status _materialize_struct_mapping_column(const ColumnMapping& mapping,
+                                              const ColumnPtr& file_column, const size_t rows,
+                                              ColumnPtr* column) {
+        DORIS_CHECK(mapping.table_type != nullptr);
+        const auto* table_type =
+                assert_cast<const DataTypeStruct*>(remove_nullable(mapping.table_type).get());
+        const auto full_file_column = file_column->convert_to_full_column_if_const();
+        const NullMap* parent_null_map = nullptr;
+        const auto* nested_file_column =
+                _nested_column_if_nullable(full_file_column, &parent_null_map);
+        const auto* file_struct = assert_cast<const ColumnStruct*>(nested_file_column);
+        DORIS_CHECK(table_type->get_elements().size() == mapping.child_mappings.size());
+
+        Columns child_columns;
+        child_columns.reserve(mapping.child_mappings.size());
+        const auto file_ordered_children =
+                _present_child_mappings_in_file_order(mapping.child_mappings);
+        const auto table_ordered_children =
+                _child_mappings_in_table_type_order(mapping, *table_type);
+        for (const auto* child_mapping : table_ordered_children) {
+            DORIS_CHECK(child_mapping != nullptr);
+            if (!child_mapping->file_local_id.has_value()) {
+                child_columns.push_back(
+                        child_mapping->table_type->create_column_const_with_default_value(rows)
+                                ->convert_to_full_column_if_const());
+                continue;
+            }
+            const auto file_child_idx =
+                    _file_child_ordinal_for_mapping(mapping, *child_mapping, file_ordered_children);
+            DORIS_CHECK(file_child_idx < file_struct->get_columns().size());
+            ColumnPtr child_column = file_struct->get_column_ptr(file_child_idx);
+            RETURN_IF_ERROR(_materialize_present_child_mapping_column(*child_mapping, child_column,
+                                                                      rows, &child_column));
+            child_columns.push_back(std::move(child_column));
+        }
+        MutableColumns mutable_child_columns;
+        mutable_child_columns.reserve(child_columns.size());
+        for (auto& child_column : child_columns) {
+            mutable_child_columns.push_back(IColumn::mutate(std::move(child_column)));
+        }
+        auto result = ColumnStruct::create(std::move(mutable_child_columns));
+        if (mapping.table_type->is_nullable()) {
+            auto null_map = ColumnUInt8::create();
+            auto& null_map_data = null_map->get_data();
+            null_map_data.resize(rows);
+            if (parent_null_map != nullptr) {
+                DORIS_CHECK(parent_null_map->size() == rows);
+                null_map_data.assign(parent_null_map->begin(), parent_null_map->end());
+            } else {
+                std::fill(null_map_data.begin(), null_map_data.end(), 0);
+            }
+            *column = ColumnNullable::create(std::move(result), std::move(null_map));
+        } else {
+            *column = std::move(result);
+        }
+        return Status::OK();
+    }
+
+    Status _materialize_array_mapping_column(const ColumnMapping& mapping,
+                                             const ColumnPtr& file_column, const size_t rows,
+                                             ColumnPtr* column) {
+        DORIS_CHECK(mapping.child_mappings.size() == 1);
+        const auto full_file_column = file_column->convert_to_full_column_if_const();
+        const NullMap* parent_null_map = nullptr;
+        const auto* nested_file_column =
+                _nested_column_if_nullable(full_file_column, &parent_null_map);
+        const auto* file_array = assert_cast<const ColumnArray*>(nested_file_column);
+        ColumnPtr nested_column = file_array->get_data_ptr();
+        const auto& element_mapping = mapping.child_mappings[0];
+        RETURN_IF_ERROR(_materialize_present_child_mapping_column(
+                element_mapping, nested_column, nested_column->size(), &nested_column));
+        auto offsets_column = file_array->get_offsets_ptr()->convert_to_full_column_if_const();
+        auto result = ColumnArray::create(IColumn::mutate(std::move(nested_column)),
+                                          IColumn::mutate(std::move(offsets_column)));
+        if (mapping.table_type->is_nullable()) {
+            auto null_map = ColumnUInt8::create();
+            auto& null_map_data = null_map->get_data();
+            null_map_data.resize(rows);
+            if (parent_null_map != nullptr) {
+                DORIS_CHECK(parent_null_map->size() == rows);
+                null_map_data.assign(parent_null_map->begin(), parent_null_map->end());
+            } else {
+                std::fill(null_map_data.begin(), null_map_data.end(), 0);
+            }
+            *column = ColumnNullable::create(std::move(result), std::move(null_map));
+        } else {
+            *column = std::move(result);
+        }
+        return Status::OK();
+    }
+
+    Status _materialize_map_mapping_column(const ColumnMapping& mapping,
+                                           const ColumnPtr& file_column, const size_t rows,
+                                           ColumnPtr* column) {
+        const auto full_file_column = file_column->convert_to_full_column_if_const();
+        const NullMap* parent_null_map = nullptr;
+        const auto* nested_file_column =
+                _nested_column_if_nullable(full_file_column, &parent_null_map);
+        const auto* file_map = assert_cast<const ColumnMap*>(nested_file_column);
+        ColumnPtr key_column = file_map->get_keys_ptr();
+        ColumnPtr value_column = file_map->get_values_ptr();
+
+        const ColumnMapping* key_mapping = nullptr;
+        const ColumnMapping* value_mapping = nullptr;
+        for (const auto& child_mapping : mapping.child_mappings) {
+            if (!child_mapping.file_local_id.has_value()) {
+                continue;
+            }
+            if (*child_mapping.file_local_id == 0) {
+                key_mapping = &child_mapping;
+            } else if (*child_mapping.file_local_id == 1) {
+                value_mapping = &child_mapping;
+            }
+        }
+
+        if (key_mapping != nullptr) {
+            RETURN_IF_ERROR(_materialize_present_child_mapping_column(
+                    *key_mapping, key_column, key_column->size(), &key_column));
+        }
+        if (value_mapping != nullptr) {
+            RETURN_IF_ERROR(_materialize_present_child_mapping_column(
+                    *value_mapping, value_column, value_column->size(), &value_column));
+        }
+        auto offsets_column = file_map->get_offsets_ptr()->convert_to_full_column_if_const();
+        auto result = ColumnMap::create(IColumn::mutate(std::move(key_column)),
+                                        IColumn::mutate(std::move(value_column)),
+                                        IColumn::mutate(std::move(offsets_column)));
+        if (mapping.table_type->is_nullable()) {
+            auto null_map = ColumnUInt8::create();
+            auto& null_map_data = null_map->get_data();
+            null_map_data.resize(rows);
+            if (parent_null_map != nullptr) {
+                DORIS_CHECK(parent_null_map->size() == rows);
+                null_map_data.assign(parent_null_map->begin(), parent_null_map->end());
+            } else {
+                std::fill(null_map_data.begin(), null_map_data.end(), 0);
+            }
+            *column = ColumnNullable::create(std::move(result), std::move(null_map));
+        } else {
+            *column = std::move(result);
+        }
+        return Status::OK();
+    }
+
+    Status _open_mapping_exprs() {
+        RowDescriptor row_desc;
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            if (mapping.projection != nullptr) {
+                RETURN_IF_ERROR(mapping.projection->prepare(_runtime_state, row_desc));
+                RETURN_IF_ERROR(mapping.projection->open(_runtime_state));
+            }
+            if (mapping.default_expr != nullptr) {
+                RETURN_IF_ERROR(mapping.default_expr->prepare(_runtime_state, row_desc));
+                RETURN_IF_ERROR(mapping.default_expr->open(_runtime_state));
+            }
+        }
+        return Status::OK();
+    }
+
+    Status _build_file_aggregate_request(TPushAggOp::type agg_type,
+                                         FileAggregateRequest* request) const {
+        DORIS_CHECK(request != nullptr);
+        DORIS_CHECK(_supports_aggregate_pushdown(agg_type));
+        request->agg_type = agg_type;
+        request->columns.clear();
+        if (agg_type == TPushAggOp::type::COUNT) {
+            // COUNT pushdown historically meant COUNT(*) and therefore carried no columns. For
+            // complex COUNT(col), materializing the full MAP/LIST/STRUCT value only to test the
+            // top-level NULL bit can be extremely expensive. When the scan projects exactly one
+            // directly-mapped complex column, pass that file column to the reader so formats such
+            // as Parquet can count the column shape from metadata/levels without decoding payload
+            // values like MAP value strings. Other COUNT cases stay on the existing row-count path
+            // to avoid changing count(*) semantics.
+            if (_data_reader.column_mapper->mappings().size() == 1) {
+                const auto& mapping = _data_reader.column_mapper->mappings()[0];
+                if (mapping.file_local_id.has_value() && mapping.file_type != nullptr &&
+                    is_complex_type(remove_nullable(mapping.file_type)->get_primitive_type()) &&
+                    mapping.virtual_column_type == TableVirtualColumnType::INVALID &&
+                    mapping.default_expr == nullptr) {
+                    FileAggregateRequest::Column column;
+                    column.projection =
+                            LocalColumnIndex::top_level(LocalColumnId(*mapping.file_local_id));
+                    request->columns.push_back(std::move(column));
+                }
+            }
+            return Status::OK();
+        }
+        request->columns.reserve(_data_reader.column_mapper->mappings().size());
+        for (const auto& mapping : _data_reader.column_mapper->mappings()) {
+            DORIS_CHECK(mapping.file_local_id.has_value());
+            FileAggregateRequest::Column column;
+            column.projection = LocalColumnIndex::top_level(LocalColumnId(*mapping.file_local_id));
+            if (!mapping.child_mappings.empty()) {
+                RETURN_IF_ERROR(build_aggregate_projection(mapping, &column.projection));
+            }
+            request->columns.push_back(std::move(column));
+        }
+        return Status::OK();
+    }
+
+    Status _materialize_aggregate_pushdown_rows(TPushAggOp::type agg_type,
+                                                const FileAggregateResult& file_result,
+                                                Block* block) {
+        if (agg_type == TPushAggOp::type::COUNT) {
+            // COUNT pushdown is not a final count value. It emits `count` default rows so the
+            // upper COUNT(*) aggregate can count them and produce the final result, including
+            // zero rows when count is 0.
+            DORIS_CHECK(file_result.count >= 0);
+            return _materialize_count_rows(cast_set<size_t>(file_result.count), block);
+        }
+        // MIN/MAX pushdown emits two rows, min first and max second, for each projected column.
+        // The upper MIN/MAX aggregate consumes those two rows to produce the final aggregate value.
+        DORIS_CHECK(file_result.columns.size() == _data_reader.column_mapper->mappings().size());
+        DORIS_CHECK(block->columns() == _data_reader.column_mapper->mappings().size());
+        Block file_block;
+        file_block.reserve(_data_reader.file_block_layout.size());
+        for (const auto& column : _data_reader.file_block_layout) {
+            file_block.insert({column.type->create_column(), column.type, column.name});
+        }
+        for (size_t column_idx = 0; column_idx < file_result.columns.size(); ++column_idx) {
+            const auto& result_column = file_result.columns[column_idx];
+            if (!result_column.has_min || !result_column.has_max) {
+                return Status::NotSupported("Missing min/max aggregate result for column {}",
+                                            _projected_columns[column_idx].name);
+            }
+            bool found_file_column = false;
+            for (size_t block_position = 0; block_position < _data_reader.file_block_layout.size();
+                 ++block_position) {
+                if (_data_reader.file_block_layout[block_position].file_column_id ==
+                    file_result.columns[column_idx].projection.column_id()) {
+                    found_file_column = true;
+                    auto column = file_block.get_by_position(block_position)
+                                          .type->create_column()
+                                          ->assert_mutable();
+                    RETURN_IF_ERROR(_insert_aggregate_projection_value(
+                            file_result.columns[column_idx].projection, result_column.min_value,
+                            column.get()));
+                    RETURN_IF_ERROR(_insert_aggregate_projection_value(
+                            file_result.columns[column_idx].projection, result_column.max_value,
+                            column.get()));
+                    file_block.replace_by_position(block_position, std::move(column));
+                    break;
+                }
+            }
+            DORIS_CHECK(found_file_column);
+        }
+        for (size_t column_idx = 0; column_idx < _data_reader.column_mapper->mappings().size();
+             ++column_idx) {
+            ColumnPtr table_column;
+            RETURN_IF_ERROR(
+                    _materialize_mapping_column(_data_reader.column_mapper->mappings()[column_idx],
+                                                &file_block, 2, &table_column));
+            block->replace_by_position(column_idx, std::move(table_column));
+        }
+        return Status::OK();
+    }
+
+    struct FileBlockColumn {
+        LocalColumnId file_column_id = LocalColumnId::invalid();
+        std::string name;
+        DataTypePtr type;
+    };
+
+    struct DataReader {
+        std::unique_ptr<FileReader> reader;
+        std::unique_ptr<TableColumnMapper> column_mapper;
+        // Schema of the data file, also including virtual column (row position).
+        std::vector<ColumnDefinition> file_schema;
+        // Layout of the block returned by file reader, determined by column mapping and file
+        // schema. It is used for file reader to materialize columns into correct type and position.
+        std::vector<FileBlockColumn> file_block_layout;
+        Block block_template;
+    };
+    DataReader _data_reader;
+    std::vector<ColumnDefinition> _projected_columns;
+    std::unique_ptr<ScanTask> _current_task;
+    std::optional<io::FileDescription> _current_file_description;
+    // Range-level compression has higher priority than scan-param compression. TVF/load can keep
+    // the logical format as CSV/TEXT while carrying the concrete compression such as GZ or LZO on
+    // each TFileRangeDesc, matching the old FileScanner reader contract.
+    TFileCompressType::type _current_range_compress_type = TFileCompressType::UNKNOWN;
+    std::optional<TUniqueId> _current_range_load_id;
+    TFileRangeDesc _current_file_range_desc;
+    std::shared_ptr<io::FileSystemProperties> _system_properties;
+    // partition key -> value
+    std::map<std::string, Field> _partition_values;
+    // Predicates built from scan conjuncts before file-level localization.
+    std::vector<TableFilter> _table_filters;
+    TableColumnPredicates _table_column_predicates;
+    VExprContextSPtrs _conjuncts;
+    ReadProfile _profile;
+    // Parsed from row-position based delete files, including position delete and deletion vector.
+    DeleteRows* _delete_rows = nullptr;
+    TFileScanRangeParams* _scan_params;
+    std::shared_ptr<io::IOContext> _io_ctx;
+    RuntimeState* _runtime_state;
+    RuntimeProfile* _scanner_profile;
+    const std::vector<SlotDescriptor*>* _file_slot_descs = nullptr;
+    FileFormat _format;
+    TPushAggOp::type _push_down_agg_type = TPushAggOp::type::NONE;
+    size_t _batch_size = 0;
+    uint64_t _condition_cache_digest = 0;
+    segment_v2::ConditionCache::ExternalCacheKey _condition_cache_key;
+    std::shared_ptr<std::vector<bool>> _condition_cache;
+    std::shared_ptr<ConditionCacheContext> _condition_cache_ctx;
+    int64_t _condition_cache_hit_count = 0;
+    bool _current_reader_reached_eof = false;
+    int64_t _remaining_table_level_count = -1;
+    std::optional<GlobalRowIdContext> _global_rowid_context;
+    bool _aggregate_pushdown_tried = false;
+    TableColumnMapperOptions _mapper_options;
+
+private:
+    static const ColumnDefinition* _find_column_definition(
+            const std::vector<ColumnDefinition>& schema, LocalColumnId column_id) {
+        for (const auto& field : schema) {
+            if (field.file_local_id() == column_id.value()) {
+                return &field;
+            }
+        }
+        return nullptr;
+    }
+
+    static bool _can_push_down_minmax_for_mapping(const ColumnMapping& mapping) {
+        if (mapping.child_mappings.empty()) {
+            return true;
+        }
+        const auto primitive_type = remove_nullable(mapping.file_type)->get_primitive_type();
+        if (primitive_type != TYPE_STRUCT) {
+            return false;
+        }
+        size_t mapped_children = 0;
+        const ColumnMapping* mapped_child = nullptr;
+        for (const auto& child_mapping : mapping.child_mappings) {
+            if (!child_mapping.file_local_id.has_value()) {
+                continue;
+            }
+            ++mapped_children;
+            mapped_child = &child_mapping;
+        }
+        return mapped_children == 1 && mapped_child != nullptr &&
+               _can_push_down_minmax_for_mapping(*mapped_child);
+    }
+
+    static Status build_aggregate_projection(const ColumnMapping& mapping,
+                                             LocalColumnIndex* projection) {
+        DORIS_CHECK(projection != nullptr);
+        DORIS_CHECK(mapping.file_local_id.has_value());
+        *projection = LocalColumnIndex::local(*mapping.file_local_id);
+        projection->children.clear();
+        projection->project_all_children = true;
+        if (mapping.child_mappings.empty()) {
+            return Status::OK();
+        }
+        projection->project_all_children = false;
+        for (const auto& child_mapping : mapping.child_mappings) {
+            if (!child_mapping.file_local_id.has_value()) {
+                continue;
+            }
+            LocalColumnIndex child_projection;
+            RETURN_IF_ERROR(build_aggregate_projection(child_mapping, &child_projection));
+            projection->children.push_back(std::move(child_projection));
+        }
+        DORIS_CHECK(projection->children.size() == 1);
+        return Status::OK();
+    }
+
+    static Status _insert_aggregate_projection_value(const LocalColumnIndex& projection,
+                                                     const Field& value, IColumn* column) {
+        DORIS_CHECK(column != nullptr);
+        if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column)) {
+            RETURN_IF_ERROR(_insert_aggregate_projection_value(
+                    projection, value, &nullable_column->get_nested_column()));
+            nullable_column->get_null_map_data().push_back(0);
+            return Status::OK();
+        }
+        if (projection.project_all_children || projection.children.empty()) {
+            column->insert(value);
+            return Status::OK();
+        }
+        auto* struct_column = assert_cast<ColumnStruct*>(column);
+        DORIS_CHECK(projection.children.size() == 1);
+        const auto& child_projection = projection.children[0];
+        DORIS_CHECK(struct_column->get_columns().size() == 1);
+        RETURN_IF_ERROR(_insert_aggregate_projection_value(child_projection, value,
+                                                           &struct_column->get_column(0)));
+        return Status::OK();
+    }
+
+    // Parse row-position deletes from table format specific parameters, and fill in _delete_rows.
+    Status _parse_delete_predicates(const SplitReadOptions& options);
+};
+
+} // namespace doris::format
diff --git a/be/src/io/file_factory.cpp b/be/src/io/file_factory.cpp
index 553cdc4460e15c..9610bc028595ec 100644
--- a/be/src/io/file_factory.cpp
+++ b/be/src/io/file_factory.cpp
@@ -57,21 +57,20 @@ namespace doris {
 
 constexpr std::string_view RANDOM_CACHE_BASE_PATH = "random";
 
-io::FileReaderOptions FileFactory::get_reader_options(RuntimeState* state,
+io::FileReaderOptions FileFactory::get_reader_options(const TQueryOptions& option,
                                                       const io::FileDescription& fd) {
     io::FileReaderOptions opts {
             .cache_base_path {},
             .file_size = fd.file_size,
             .mtime = fd.mtime,
     };
-    if (config::enable_file_cache && state != nullptr &&
-        state->query_options().__isset.enable_file_cache &&
-        state->query_options().enable_file_cache && fd.file_cache_admission) {
+    if (config::enable_file_cache && option.__isset.enable_file_cache && option.enable_file_cache &&
+        fd.file_cache_admission) {
         opts.cache_type = io::FileCachePolicy::FILE_BLOCK_CACHE;
     }
-    if (state != nullptr && state->query_options().__isset.file_cache_base_path &&
-        state->query_options().file_cache_base_path != RANDOM_CACHE_BASE_PATH) {
-        opts.cache_base_path = state->query_options().file_cache_base_path;
+    if (option.__isset.file_cache_base_path &&
+        option.file_cache_base_path != RANDOM_CACHE_BASE_PATH) {
+        opts.cache_base_path = option.file_cache_base_path;
     }
     return opts;
 }
diff --git a/be/src/io/file_factory.h b/be/src/io/file_factory.h
index 7d662e4fdde469..33595313b921b1 100644
--- a/be/src/io/file_factory.h
+++ b/be/src/io/file_factory.h
@@ -16,6 +16,7 @@
 // under the License.
 #pragma once
 
+#include <gen_cpp/PaloInternalService_types.h>
 #include <gen_cpp/PlanNodes_types.h>
 #include <gen_cpp/Types_types.h>
 #include <glog/logging.h>
@@ -64,6 +65,8 @@ struct FileDescription {
     // -1 means unset.
     // If the file length is not set, the file length will be fetched from the file system.
     int64_t file_size = -1;
+    int64_t range_start_offset = 0;
+    int64_t range_size = -1;
     // modification time of this file.
     // 0 means unset.
     int64_t mtime = 0;
@@ -83,7 +86,7 @@ class FileFactory {
     ENABLE_FACTORY_CREATOR(FileFactory);
 
 public:
-    static io::FileReaderOptions get_reader_options(RuntimeState* state,
+    static io::FileReaderOptions get_reader_options(const TQueryOptions& option,
                                                     const io::FileDescription& fd);
 
     /// Create a temporary FileSystem for accessing file corresponding to `file_description`
diff --git a/be/src/io/io_common.h b/be/src/io/io_common.h
index 36b20517afb87c..566e376219efab 100644
--- a/be/src/io/io_common.h
+++ b/be/src/io/io_common.h
@@ -97,6 +97,10 @@ struct IOContext {
     // if `is_warmup` == true, this I/O request is from a warm up task
     bool is_warmup {false};
     int64_t condition_cache_filtered_rows = 0;
+    // Rows removed by file-local predicate conjuncts inside FileReader/TableReader. Scanner-level
+    // output filtering already records its own unselected rows; this counter carries the rows that
+    // were filtered before the block returned to Scanner.
+    int64_t predicate_filtered_rows = 0;
 };
 
 } // namespace io
diff --git a/be/src/storage/segment/condition_cache.h b/be/src/storage/segment/condition_cache.h
index 511b9c56abac5e..a189312ee1427a 100644
--- a/be/src/storage/segment/condition_cache.h
+++ b/be/src/storage/segment/condition_cache.h
@@ -26,6 +26,7 @@
 #include <memory>
 #include <roaring/roaring.hh>
 #include <string>
+#include <vector>
 
 #include "common/config.h"
 #include "common/status.h"
@@ -38,7 +39,19 @@
 #include "util/slice.h"
 #include "util/time.h"
 
-namespace doris::segment_v2 {
+namespace doris {
+
+// Context passed from scan/table-reader layers to physical readers for condition cache
+// integration. On MISS, readers set filter_result[granule] to true when row-level predicates keep
+// at least one row in that granule. On HIT, readers skip granules whose cached bit is false.
+struct ConditionCacheContext {
+    bool is_hit = false;
+    std::shared_ptr<std::vector<bool>> filter_result; // per-granule: true = has surviving rows
+    int64_t base_granule = 0;                         // global granule index of filter_result[0]
+    static constexpr int GRANULE_SIZE = 2048;
+};
+
+namespace segment_v2 {
 
 class ConditionCacheHandle;
 
@@ -167,4 +180,5 @@ class ConditionCacheHandle {
     DISALLOW_COPY_AND_ASSIGN(ConditionCacheHandle);
 };
 
-} // namespace doris::segment_v2
+} // namespace segment_v2
+} // namespace doris
diff --git a/be/src/util/jni-util.h b/be/src/util/jni-util.h
index b230ac67f4778e..de9030b5b3a7c7 100644
--- a/be/src/util/jni-util.h
+++ b/be/src/util/jni-util.h
@@ -606,6 +606,14 @@ class Object {
 
     bool uninitialized() const { return _obj == nullptr; }
 
+    void reset(JNIEnv* env) {
+        if (_obj == nullptr) {
+            return;
+        }
+        RefHelper<Ref>::destroy(env, _obj);
+        _obj = nullptr;
+    }
+
     template <RefType T>
     bool equal(JNIEnv* env, const Object<T>& other) {
         DCHECK(!uninitialized());
diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt
index 2edcff5eef87c9..95d2a435d8d00e 100644
--- a/be/test/CMakeLists.txt
+++ b/be/test/CMakeLists.txt
@@ -30,6 +30,7 @@ file(GLOB_RECURSE UT_FILES CONFIGURE_DEPENDS
     exec/*.cpp
     exprs/*.cpp
     format/*.cpp
+    format_v2/*.cpp
     gutil/*.cpp
     io/*.cpp
     load/*.cpp
diff --git a/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp b/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp
new file mode 100644
index 00000000000000..69cf458e2fdc5f
--- /dev/null
+++ b/be/test/core/data_type_serde/data_type_serde_decoded_values_test.cpp
@@ -0,0 +1,1852 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cctz/time_zone.h>
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/column/column_decimal.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_date_or_datetime_v2.h"
+#include "core/data_type/data_type_decimal.h"
+#include "core/data_type/data_type_nothing.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_time.h"
+#include "core/data_type/data_type_timestamptz.h"
+#include "core/data_type_serde/decoded_column_view.h"
+#include "core/field.h"
+#include "core/string_ref.h"
+#include "core/value/timestamptz_value.h"
+#include "util/timezone_utils.h"
+
+namespace doris {
+namespace {
+
+struct ReadColumnResult {
+    Status status;
+    MutableColumnPtr column;
+};
+
+template <typename T>
+DecodedColumnView make_fixed_view(DecodedValueKind kind, const std::vector<T>& values,
+                                  const std::vector<uint8_t>* null_map = nullptr) {
+    DecodedColumnView view;
+    view.value_kind = kind;
+    view.row_count = null_map != nullptr ? static_cast<int64_t>(null_map->size())
+                                         : static_cast<int64_t>(values.size());
+    view.values = values.empty() ? nullptr : reinterpret_cast<const uint8_t*>(values.data());
+    view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data();
+    return view;
+}
+
+DecodedColumnView make_binary_view(DecodedValueKind kind, const std::vector<StringRef>& values,
+                                   int fixed_length = -1,
+                                   const std::vector<uint8_t>* null_map = nullptr) {
+    DecodedColumnView view;
+    view.value_kind = kind;
+    view.row_count = null_map != nullptr ? static_cast<int64_t>(null_map->size())
+                                         : static_cast<int64_t>(values.size());
+    view.binary_values = values.empty() ? nullptr : &values;
+    view.fixed_length = fixed_length;
+    view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data();
+    return view;
+}
+
+DecodedColumnView make_bool_view(const std::vector<uint8_t>& values,
+                                 const std::vector<uint8_t>* null_map = nullptr) {
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::BOOL;
+    view.row_count = null_map != nullptr ? static_cast<int64_t>(null_map->size())
+                                         : static_cast<int64_t>(values.size());
+    view.values = values.empty() ? nullptr : reinterpret_cast<const uint8_t*>(values.data());
+    view.null_map = null_map == nullptr || null_map->empty() ? nullptr : null_map->data();
+    return view;
+}
+
+DecodedColumnView with_logical_integer(DecodedColumnView view, int bit_width, bool is_signed) {
+    view.logical_integer_bit_width = bit_width;
+    view.logical_integer_is_signed = is_signed;
+    return view;
+}
+
+ReadColumnResult read_column(const DataTypePtr& type, const DecodedColumnView& view) {
+    auto column = type->create_column();
+    auto status = type->get_serde()->read_column_from_decoded_values(*column, view);
+    return {std::move(status), std::move(column)};
+}
+
+void expect_not_supported(const Status& status) {
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(ErrorCode::NOT_IMPLEMENTED_ERROR, status.code()) << status;
+}
+
+void expect_corruption(const Status& status) {
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(ErrorCode::CORRUPTION, status.code()) << status;
+}
+
+void expect_data_quality_error(const Status& status) {
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(ErrorCode::DATA_QUALITY_ERROR, status.code()) << status;
+}
+
+void expect_column_strings(const IDataType& type, const IColumn& column,
+                           const std::vector<std::string>& expected) {
+    ASSERT_EQ(expected.size(), column.size());
+    for (size_t row = 0; row < expected.size(); ++row) {
+        EXPECT_EQ(expected[row], type.to_string(column, row)) << "row=" << row;
+    }
+}
+
+void expect_binary_column(const IColumn& column, const std::vector<std::string>& expected) {
+    const auto& string_column = assert_cast<const ColumnString&>(column);
+    ASSERT_EQ(expected.size(), string_column.size());
+    for (size_t row = 0; row < expected.size(); ++row) {
+        const auto value = string_column.get_data_at(row);
+        EXPECT_EQ(expected[row], std::string(value.data, value.size)) << "row=" << row;
+    }
+}
+
+void expect_nullable_all_null(const IColumn& column, size_t expected_size) {
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    ASSERT_EQ(expected_size, nullable_column.size());
+    ASSERT_EQ(expected_size, nullable_column.get_nested_column().size());
+    for (size_t row = 0; row < expected_size; ++row) {
+        EXPECT_TRUE(nullable_column.is_null_at(row)) << "row=" << row;
+    }
+}
+
+Field read_field(const DataTypePtr& type, const DecodedColumnView& view) {
+    Field field;
+    auto status = type->get_serde()->read_field_from_decoded_value(*type, &field, view);
+    EXPECT_TRUE(status.ok()) << status;
+    return field;
+}
+
+Status read_field_status(const DataTypePtr& type, const DecodedColumnView& view) {
+    Field field;
+    return type->get_serde()->read_field_from_decoded_value(*type, &field, view);
+}
+
+std::vector<StringRef> string_refs(const std::vector<std::string>& values) {
+    std::vector<StringRef> refs;
+    refs.reserve(values.size());
+    for (const auto& value : values) {
+        refs.emplace_back(value.data(), value.size());
+    }
+    return refs;
+}
+
+#pragma pack(1)
+struct TestInt96Timestamp {
+    int64_t nanos_of_day;
+    int32_t julian_day;
+};
+#pragma pack()
+
+static_assert(sizeof(TestInt96Timestamp) == 12);
+
+Decimal128V3 decimal128_v3(Int128 value) {
+    return Decimal128V3(value);
+}
+
+Decimal256 decimal256_from_int64(int64_t value) {
+    return Decimal256(wide::Int256(value));
+}
+
+} // namespace
+
+// ----------------------------------------------------------------------
+// Base SerDe behavior
+// ----------------------------------------------------------------------
+// These cases define the default contract for types that have not implemented decoded-value
+// materialization. Batch reads must report NotSupported, and the single-field path must surface
+// the same error because it is implemented by delegating to the batch reader.
+
+TEST(DataTypeSerDeDecodedValuesTest, BaseSerdeRejectsDecodedValues) {
+    auto type = std::make_shared<DataTypeNothing>();
+    std::vector<int32_t> values = {1};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+
+    auto result = read_column(type, view);
+
+    expect_not_supported(result.status);
+    EXPECT_EQ(0, result.column->size());
+    EXPECT_NE(std::string::npos, result.status.to_string().find("Nothing"));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, BaseFieldUsesBatchReaderAndPropagatesError) {
+    auto type = std::make_shared<DataTypeNothing>();
+    std::vector<int32_t> values = {1};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+    Field field = Field::create_field<TYPE_INT>(123);
+
+    auto status = type->get_serde()->read_field_from_decoded_value(*type, &field, view);
+
+    expect_not_supported(status);
+    EXPECT_EQ(TYPE_INT, field.get_type());
+    EXPECT_EQ(123, field.get<TYPE_INT>());
+}
+
+// ----------------------------------------------------------------------
+// Number SerDe happy path
+// ----------------------------------------------------------------------
+// The numeric matrix verifies physical kind dispatch and the exact static_cast behavior used by
+// the reader. Narrow integer overflow is intentionally locked to current C++ conversion behavior;
+// if product semantics change to reject overflow, these expectations should be updated with the
+// implementation change.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadBooleanFromBool) {
+    auto type = std::make_shared<DataTypeBool>();
+    std::vector<uint8_t> values = {true, false, true};
+    auto view = make_bool_view(values);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnBool&>(*result.column);
+    ASSERT_EQ(3, column.size());
+    EXPECT_EQ(1, column.get_element(0));
+    EXPECT_EQ(0, column.get_element(1));
+    EXPECT_EQ(1, column.get_element(2));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadSignedIntegersFromInt32) {
+    std::vector<int32_t> values = {0, 1, -1, 127, -128};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+
+    {
+        auto result = read_column(std::make_shared<DataTypeInt8>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt8&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        EXPECT_EQ(0, column.get_element(0));
+        EXPECT_EQ(1, column.get_element(1));
+        EXPECT_EQ(-1, column.get_element(2));
+        EXPECT_EQ(127, column.get_element(3));
+        EXPECT_EQ(-128, column.get_element(4));
+    }
+    {
+        auto result = read_column(std::make_shared<DataTypeInt16>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt16&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        for (size_t row = 0; row < values.size(); ++row) {
+            EXPECT_EQ(static_cast<int16_t>(values[row]), column.get_element(row));
+        }
+    }
+    {
+        auto result = read_column(std::make_shared<DataTypeInt32>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt32&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        for (size_t row = 0; row < values.size(); ++row) {
+            EXPECT_EQ(values[row], column.get_element(row));
+        }
+    }
+    {
+        auto result = read_column(std::make_shared<DataTypeInt64>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt64&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        for (size_t row = 0; row < values.size(); ++row) {
+            EXPECT_EQ(static_cast<int64_t>(values[row]), column.get_element(row));
+        }
+    }
+    {
+        auto result = read_column(std::make_shared<DataTypeInt128>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt128&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        for (size_t row = 0; row < values.size(); ++row) {
+            EXPECT_EQ(static_cast<__int128_t>(values[row]), column.get_element(row));
+        }
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadSignedIntegersFromInt64) {
+    std::vector<int64_t> values = {0, 1, -1, 127, -128};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+
+    auto tiny = read_column(std::make_shared<DataTypeInt8>(), view);
+    ASSERT_TRUE(tiny.status.ok()) << tiny.status;
+    const auto& tiny_column = assert_cast<const ColumnInt8&>(*tiny.column);
+    EXPECT_EQ(127, tiny_column.get_element(3));
+    EXPECT_EQ(-128, tiny_column.get_element(4));
+
+    auto small = read_column(std::make_shared<DataTypeInt16>(), view);
+    ASSERT_TRUE(small.status.ok()) << small.status;
+    const auto& small_column = assert_cast<const ColumnInt16&>(*small.column);
+    EXPECT_EQ(127, small_column.get_element(3));
+    EXPECT_EQ(-128, small_column.get_element(4));
+
+    auto integer = read_column(std::make_shared<DataTypeInt32>(), view);
+    ASSERT_TRUE(integer.status.ok()) << integer.status;
+    const auto& int_column = assert_cast<const ColumnInt32&>(*integer.column);
+    EXPECT_EQ(127, int_column.get_element(3));
+    EXPECT_EQ(-128, int_column.get_element(4));
+
+    auto bigint = read_column(std::make_shared<DataTypeInt64>(), view);
+    ASSERT_TRUE(bigint.status.ok()) << bigint.status;
+    const auto& bigint_column = assert_cast<const ColumnInt64&>(*bigint.column);
+    ASSERT_EQ(values.size(), bigint_column.size());
+    for (size_t row = 0; row < values.size(); ++row) {
+        EXPECT_EQ(values[row], bigint_column.get_element(row));
+    }
+
+    auto largeint = read_column(std::make_shared<DataTypeInt128>(), view);
+    ASSERT_TRUE(largeint.status.ok()) << largeint.status;
+    const auto& largeint_column = assert_cast<const ColumnInt128&>(*largeint.column);
+    ASSERT_EQ(values.size(), largeint_column.size());
+    for (size_t row = 0; row < values.size(); ++row) {
+        EXPECT_EQ(static_cast<__int128_t>(values[row]), largeint_column.get_element(row));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadIntegersFromUnsignedSources) {
+    {
+        std::vector<uint32_t> values = {0, 1, std::numeric_limits<uint32_t>::max()};
+        auto view = make_fixed_view(DecodedValueKind::UINT32, values);
+        auto result = read_column(std::make_shared<DataTypeInt64>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt64&>(*result.column);
+        EXPECT_EQ(0, column.get_element(0));
+        EXPECT_EQ(1, column.get_element(1));
+        EXPECT_EQ(static_cast<int64_t>(std::numeric_limits<uint32_t>::max()),
+                  column.get_element(2));
+    }
+    {
+        std::vector<uint64_t> values = {0, 1, std::numeric_limits<uint64_t>::max()};
+        auto view = make_fixed_view(DecodedValueKind::UINT64, values);
+        auto result = read_column(std::make_shared<DataTypeInt128>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt128&>(*result.column);
+        EXPECT_EQ(0, column.get_element(0));
+        EXPECT_EQ(1, column.get_element(1));
+        EXPECT_EQ(static_cast<__int128_t>(std::numeric_limits<uint64_t>::max()),
+                  column.get_element(2));
+    }
+    {
+        std::vector<uint64_t> values = {static_cast<uint64_t>(std::numeric_limits<int64_t>::max())};
+        auto view = make_fixed_view(DecodedValueKind::UINT64, values);
+        auto result = read_column(std::make_shared<DataTypeInt64>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt64&>(*result.column);
+        EXPECT_EQ(std::numeric_limits<int64_t>::max(), column.get_element(0));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadUnsignedLogicalIntegersCastsPhysicalValues) {
+    {
+        std::vector<int32_t> values = {0, 127, 255, 32767, 65535, -1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, false);
+        auto result = read_column(std::make_shared<DataTypeInt16>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt16&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        EXPECT_EQ(0, column.get_element(0));
+        EXPECT_EQ(127, column.get_element(1));
+        EXPECT_EQ(255, column.get_element(2));
+        EXPECT_EQ(255, column.get_element(3));
+        EXPECT_EQ(255, column.get_element(4));
+        EXPECT_EQ(255, column.get_element(5));
+    }
+    {
+        std::vector<int32_t> values = {32767, 65535, -1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 16, false);
+        auto result = read_column(std::make_shared<DataTypeInt32>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt32&>(*result.column);
+        ASSERT_EQ(values.size(), column.size());
+        EXPECT_EQ(32767, column.get_element(0));
+        EXPECT_EQ(65535, column.get_element(1));
+        EXPECT_EQ(65535, column.get_element(2));
+    }
+    {
+        std::vector<int32_t> values = {-1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::UINT32, values), 32, false);
+        auto result = read_column(std::make_shared<DataTypeInt64>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt64&>(*result.column);
+        ASSERT_EQ(1, column.size());
+        EXPECT_EQ(4294967295LL, column.get_element(0));
+    }
+    {
+        std::vector<int64_t> values = {-1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::UINT64, values), 64, false);
+        auto result = read_column(std::make_shared<DataTypeInt128>(), view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnInt128&>(*result.column);
+        ASSERT_EQ(1, column.size());
+        EXPECT_EQ(static_cast<__int128_t>(std::numeric_limits<uint64_t>::max()),
+                  column.get_element(0));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadSignedLogicalIntegersCastsPhysicalValues) {
+    std::vector<int32_t> values = {127, 128, 255, -1};
+    auto view = with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, true);
+    auto result = read_column(std::make_shared<DataTypeInt8>(), view);
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnInt8&>(*result.column);
+    ASSERT_EQ(values.size(), column.size());
+    EXPECT_EQ(static_cast<Int8>(127), column.get_element(0));
+    EXPECT_EQ(static_cast<Int8>(-128), column.get_element(1));
+    EXPECT_EQ(static_cast<Int8>(-1), column.get_element(2));
+    EXPECT_EQ(static_cast<Int8>(-1), column.get_element(3));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFloatAndDouble) {
+    {
+        auto type = std::make_shared<DataTypeFloat32>();
+        std::vector<float> values = {0.0F, -0.0F, 1.5F, -2.25F};
+        auto result = read_column(type, make_fixed_view(DecodedValueKind::FLOAT, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnFloat32&>(*result.column);
+        EXPECT_FLOAT_EQ(0.0F, column.get_element(0));
+        EXPECT_TRUE(std::signbit(column.get_element(1)));
+        EXPECT_FLOAT_EQ(1.5F, column.get_element(2));
+        EXPECT_FLOAT_EQ(-2.25F, column.get_element(3));
+    }
+    {
+        auto type = std::make_shared<DataTypeFloat64>();
+        std::vector<double> values = {0.0, -0.0, 1.5, -2.25};
+        auto result = read_column(type, make_fixed_view(DecodedValueKind::DOUBLE, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnFloat64&>(*result.column);
+        EXPECT_DOUBLE_EQ(0.0, column.get_element(0));
+        EXPECT_TRUE(std::signbit(column.get_element(1)));
+        EXPECT_DOUBLE_EQ(1.5, column.get_element(2));
+        EXPECT_DOUBLE_EQ(-2.25, column.get_element(3));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFloatSpecialValues) {
+    {
+        std::vector<float> values = {std::numeric_limits<float>::quiet_NaN(),
+                                     std::numeric_limits<float>::infinity(),
+                                     -std::numeric_limits<float>::infinity()};
+        auto result = read_column(std::make_shared<DataTypeFloat32>(),
+                                  make_fixed_view(DecodedValueKind::FLOAT, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnFloat32&>(*result.column);
+        EXPECT_TRUE(std::isnan(column.get_element(0)));
+        EXPECT_TRUE(std::isinf(column.get_element(1)));
+        EXPECT_FALSE(std::signbit(column.get_element(1)));
+        EXPECT_TRUE(std::isinf(column.get_element(2)));
+        EXPECT_TRUE(std::signbit(column.get_element(2)));
+    }
+    {
+        std::vector<double> values = {std::numeric_limits<double>::quiet_NaN(),
+                                      std::numeric_limits<double>::infinity(),
+                                      -std::numeric_limits<double>::infinity()};
+        auto result = read_column(std::make_shared<DataTypeFloat64>(),
+                                  make_fixed_view(DecodedValueKind::DOUBLE, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnFloat64&>(*result.column);
+        EXPECT_TRUE(std::isnan(column.get_element(0)));
+        EXPECT_TRUE(std::isinf(column.get_element(1)));
+        EXPECT_FALSE(std::signbit(column.get_element(1)));
+        EXPECT_TRUE(std::isinf(column.get_element(2)));
+        EXPECT_TRUE(std::signbit(column.get_element(2)));
+    }
+}
+
+// ----------------------------------------------------------------------
+// Number SerDe error paths
+// ----------------------------------------------------------------------
+// These cases separate unsupported physical kinds from corrupt decoded buffers. Unsupported kinds
+// must not append to the destination column; missing value buffers are allowed only for empty or
+// all-null batches where no non-null row can dereference the buffer.
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsMismatchedKind) {
+    struct Case {
+        DataTypePtr type;
+        DecodedValueKind kind;
+    };
+    std::vector<Case> cases = {
+            {std::make_shared<DataTypeBool>(), DecodedValueKind::INT32},
+            {std::make_shared<DataTypeInt32>(), DecodedValueKind::BOOL},
+            {std::make_shared<DataTypeFloat32>(), DecodedValueKind::DOUBLE},
+            {std::make_shared<DataTypeFloat64>(), DecodedValueKind::FLOAT},
+            {std::make_shared<DataTypeInt32>(), DecodedValueKind::BINARY},
+    };
+
+    for (const auto& test_case : cases) {
+        std::vector<int32_t> values = {1};
+        auto result = read_column(test_case.type, make_fixed_view(test_case.kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsMissingValuesWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeInt32>();
+    {
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT32;
+        view.row_count = 3;
+        auto result = read_column(type, view);
+        expect_corruption(result.status);
+    }
+    {
+        std::vector<uint8_t> null_map = {1, 0, 1};
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT32;
+        view.row_count = 3;
+        view.null_map = null_map.data();
+        auto result = read_column(type, view);
+        expect_corruption(result.status);
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberAllowsMissingValuesForAllNullOrEmpty) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    {
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT32;
+        view.row_count = 0;
+        auto result = read_column(type, view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        EXPECT_EQ(0, result.column->size());
+    }
+    {
+        std::vector<uint8_t> null_map = {1, 1, 1};
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT32;
+        view.row_count = 3;
+        view.null_map = null_map.data();
+        auto result = read_column(type, view);
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+        const auto& nested_column =
+                assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+        ASSERT_EQ(3, nullable_column.size());
+        for (size_t row = 0; row < nullable_column.size(); ++row) {
+            EXPECT_TRUE(nullable_column.is_null_at(row));
+            EXPECT_EQ(0, nested_column.get_element(row));
+        }
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsOutOfRangeValueInStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt8>());
+    std::vector<int64_t> values = {127, 128};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberNullsOutOfRangeValueInNonStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt8>());
+    std::vector<int64_t> values = {127, 128, -129, -128};
+    std::vector<uint8_t> null_map = {0, 0, 0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& nested_column = assert_cast<const ColumnInt8&>(nullable_column.get_nested_column());
+    ASSERT_EQ(4, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_TRUE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_EQ(127, nested_column.get_element(0));
+    EXPECT_EQ(0, nested_column.get_element(1));
+    EXPECT_EQ(0, nested_column.get_element(2));
+    EXPECT_EQ(-128, nested_column.get_element(3));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberRejectsUnsignedOverflowInStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>());
+    std::vector<uint64_t> values = {static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
+                                    std::numeric_limits<uint64_t>::max()};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::UINT64, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NumberNullsUnsignedOverflowInNonStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>());
+    std::vector<uint64_t> values = {static_cast<uint64_t>(std::numeric_limits<int64_t>::max()),
+                                    std::numeric_limits<uint64_t>::max()};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::UINT64, values, &null_map);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& nested_column =
+            assert_cast<const ColumnInt64&>(nullable_column.get_nested_column());
+    ASSERT_EQ(2, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_EQ(std::numeric_limits<int64_t>::max(), nested_column.get_element(0));
+    EXPECT_EQ(0, nested_column.get_element(1));
+}
+
+// ----------------------------------------------------------------------
+// String / Binary SerDe
+// ----------------------------------------------------------------------
+// String-like decoded reads must preserve exact byte sequences. The embedded-NUL case prevents
+// accidental C-string truncation. Nullable string tests ensure null rows materialize default nested
+// values while the outer null map remains authoritative.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadStringFromBinary) {
+    auto type = std::make_shared<DataTypeString>();
+    std::vector<std::string> storage = {"alpha", "", std::string("a\0b", 3), "utf8-\xe4\xb8\xad"};
+    auto refs = string_refs(storage);
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_binary_column(*result.column, storage);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadStringFromFixedBinary) {
+    auto type = std::make_shared<DataTypeString>();
+    std::vector<std::string> storage = {std::string("\x00\x01\x02\x03", 4),
+                                        std::string("\x7f\x80\xfe\xff", 4)};
+    auto refs = string_refs(storage);
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 4));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_binary_column(*result.column, storage);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, StringNullMapMaterialization) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    std::vector<std::string> storage = {"alpha", "", "omega"};
+    auto refs = string_refs(storage);
+    std::vector<uint8_t> null_map = {0, 1, 0};
+
+    auto result =
+            read_column(type, make_binary_view(DecodedValueKind::BINARY, refs, -1, &null_map));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    expect_binary_column(nullable_column.get_nested_column(), {"alpha", "", "omega"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, StringRejectsMismatchedKind) {
+    auto type = std::make_shared<DataTypeString>();
+    for (auto kind : {DecodedValueKind::INT32, DecodedValueKind::INT64, DecodedValueKind::DOUBLE}) {
+        std::vector<int64_t> values = {1};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, StringRejectsMissingBinaryValuesWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeString>();
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::BINARY;
+    view.row_count = 1;
+
+    auto result = read_column(type, view);
+
+    expect_corruption(result.status);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, StringAllowsAllNullWithoutBinaryValues) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    std::vector<uint8_t> null_map = {1, 1};
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::BINARY;
+    view.row_count = 2;
+    view.null_map = null_map.data();
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(2, nullable_column.size());
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    expect_binary_column(nullable_column.get_nested_column(), {"", ""});
+}
+
+// ----------------------------------------------------------------------
+// DateV2 SerDe
+// ----------------------------------------------------------------------
+// DateV2 accepts Parquet DATE-style epoch days as INT32. Null rows insert default nested dates and
+// missing buffers are rejected only when a non-null row requires a value.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateV2FromEpochDays) {
+    auto type = std::make_shared<DataTypeDateV2>();
+    std::vector<int32_t> values = {-1, 0, 1, 18628, 18321};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1969-12-31", "1970-01-01", "1970-01-02", "2021-01-01", "2020-02-29"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateV2HandlesNulls) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateV2>());
+    std::vector<int32_t> values = {0, 1, 2};
+    std::vector<uint8_t> null_map = {0, 1, 0};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values, &null_map));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    expect_column_strings(*type, *result.column, {"1970-01-01", "NULL", "1970-01-03"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateV2RejectsInvalidKind) {
+    auto type = std::make_shared<DataTypeDateV2>();
+    for (auto kind :
+         {DecodedValueKind::INT64, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) {
+        std::vector<int64_t> values = {0};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateV2RejectsMissingValuesWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeDateV2>();
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::INT32;
+    view.row_count = 1;
+
+    auto result = read_column(type, view);
+
+    expect_corruption(result.status);
+}
+
+// ----------------------------------------------------------------------
+// DateTimeV2 SerDe
+// ----------------------------------------------------------------------
+// Timestamp decoding covers INT64 micros/millis, UNKNOWN-as-micros compatibility, UTC-adjusted
+// conversion with explicit/default timezones, INT96 Julian-day timestamps, and invalid buffer/kind
+// errors. Negative epoch values are included to lock correct floor-division behavior.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Micros) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {-1, 0, 1, 1234567, 86400000000LL - 1};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1969-12-31 23:59:59.999999", "1970-01-01 00:00:00.000000",
+                           "1970-01-01 00:00:00.000001", "1970-01-01 00:00:01.234567",
+                           "1970-01-01 23:59:59.999999"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Millis) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {-1, 0, 1, 1234};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MILLIS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1969-12-31 23:59:59.999000", "1970-01-01 00:00:00.000000",
+                           "1970-01-01 00:00:00.001000", "1970-01-01 00:00:01.234000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Nanos) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {-1000, 0, 1000, 1234567890};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::NANOS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1969-12-31 23:59:59.999999", "1970-01-01 00:00:00.000000",
+                           "1970-01-01 00:00:00.000001", "1970-01-01 00:00:01.234567"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UnknownUnitAsMicros) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {1000000};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::UNKNOWN;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column, {"1970-01-01 00:00:01.000000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UtcAdjustedDefaultUtc) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MICROS;
+    view.timestamp_is_adjusted_to_utc = true;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column, {"1970-01-01 00:00:00.000000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2UtcAdjustedWithTimezones) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {0, -1, 1234567};
+    cctz::time_zone shanghai;
+    cctz::time_zone new_york;
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai));
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("-05:00", new_york));
+
+    auto shanghai_view = make_fixed_view(DecodedValueKind::INT64, values);
+    shanghai_view.time_unit = DecodedTimeUnit::MICROS;
+    shanghai_view.timestamp_is_adjusted_to_utc = true;
+    shanghai_view.timezone = &shanghai;
+    auto shanghai_result = read_column(type, shanghai_view);
+    ASSERT_TRUE(shanghai_result.status.ok()) << shanghai_result.status;
+    expect_column_strings(*type, *shanghai_result.column,
+                          {"1970-01-01 08:00:00.000000", "1970-01-01 07:59:59.999999",
+                           "1970-01-01 08:00:01.234567"});
+
+    auto new_york_view = make_fixed_view(DecodedValueKind::INT64, values);
+    new_york_view.time_unit = DecodedTimeUnit::MICROS;
+    new_york_view.timestamp_is_adjusted_to_utc = true;
+    new_york_view.timezone = &new_york;
+    auto new_york_result = read_column(type, new_york_view);
+    ASSERT_TRUE(new_york_result.status.ok()) << new_york_result.status;
+    expect_column_strings(*type, *new_york_result.column,
+                          {"1969-12-31 19:00:00.000000", "1969-12-31 18:59:59.999999",
+                           "1969-12-31 19:00:01.234567"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDateTimeV2Int96) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTimeV2>(6));
+    std::vector<TestInt96Timestamp> values = {
+            {0, 2440588},
+            {86399999999000LL, 2440587},
+            {0, 2440589},
+    };
+    std::vector<uint8_t> null_map = {0, 0, 1};
+    auto view = make_fixed_view(DecodedValueKind::INT96, values, &null_map);
+    cctz::time_zone shanghai;
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai));
+    view.timezone = &shanghai;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"1970-01-01 08:00:00.000000", "1970-01-01 07:59:59.999999", "NULL"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimestampTzInt64AsUtcInstant) {
+    auto type = std::make_shared<DataTypeTimeStampTz>(6);
+    // 2024-12-31 16:00:00 UTC is displayed as 2025-01-01 00:00:00+08:00.
+    cctz::time_zone shanghai;
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai));
+
+    std::vector<int64_t> micros_values = {1735660800000000LL, 1735660800123456LL};
+    auto micros_view = make_fixed_view(DecodedValueKind::INT64, micros_values);
+    micros_view.time_unit = DecodedTimeUnit::MICROS;
+    auto micros_result = read_column(type, micros_view);
+    ASSERT_TRUE(micros_result.status.ok()) << micros_result.status;
+    const auto& micros_column = assert_cast<const ColumnTimeStampTz&>(*micros_result.column);
+    EXPECT_EQ(micros_column.get_element(0).to_string(shanghai, 6),
+              "2025-01-01 00:00:00.000000+08:00");
+    EXPECT_EQ(micros_column.get_element(1).to_string(shanghai, 6),
+              "2025-01-01 00:00:00.123456+08:00");
+
+    std::vector<int64_t> millis_values = {1735660800000LL};
+    auto millis_view = make_fixed_view(DecodedValueKind::INT64, millis_values);
+    millis_view.time_unit = DecodedTimeUnit::MILLIS;
+    auto millis_result = read_column(type, millis_view);
+    ASSERT_TRUE(millis_result.status.ok()) << millis_result.status;
+    const auto& millis_column = assert_cast<const ColumnTimeStampTz&>(*millis_result.column);
+    EXPECT_EQ(millis_column.get_element(0).to_string(shanghai, 6),
+              "2025-01-01 00:00:00.000000+08:00");
+
+    std::vector<int64_t> nanos_values = {1735660800123456000LL};
+    auto nanos_view = make_fixed_view(DecodedValueKind::INT64, nanos_values);
+    nanos_view.time_unit = DecodedTimeUnit::NANOS;
+    auto nanos_result = read_column(type, nanos_view);
+    ASSERT_TRUE(nanos_result.status.ok()) << nanos_result.status;
+    const auto& nanos_column = assert_cast<const ColumnTimeStampTz&>(*nanos_result.column);
+    EXPECT_EQ(nanos_column.get_element(0).to_string(shanghai, 6),
+              "2025-01-01 00:00:00.123456+08:00");
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, TimestampTzReadsInt96AsUtcInstant) {
+    auto type = std::make_shared<DataTypeTimeStampTz>(6);
+    std::vector<TestInt96Timestamp> values = {{0, 2440588}, {123456789000LL, 2440588}};
+    auto view = make_fixed_view(DecodedValueKind::INT96, values);
+    cctz::time_zone shanghai;
+    ASSERT_TRUE(TimezoneUtils::find_cctz_time_zone("+08:00", shanghai));
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnTimeStampTz&>(*result.column);
+    EXPECT_EQ(column.get_element(0).to_string(shanghai, 6), "1970-01-01 08:00:00.000000+08:00");
+    EXPECT_EQ(column.get_element(1).to_string(shanghai, 6), "1970-01-01 08:02:03.456789+08:00");
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsInvalidKind) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    for (auto kind :
+         {DecodedValueKind::INT32, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) {
+        std::vector<int64_t> values = {0};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsMissingValuesWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    DecodedColumnView view;
+    view.value_kind = DecodedValueKind::INT64;
+    view.row_count = 1;
+
+    auto result = read_column(type, view);
+
+    expect_corruption(result.status);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DateTimeV2RejectsOutOfRangeEpochWithoutAbort) {
+    auto type = std::make_shared<DataTypeDateTimeV2>(6);
+    std::vector<int64_t> values = {0, -377673580800000001LL};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+    EXPECT_EQ(0, result.column->size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableDateTimeV2RejectsOutOfRangeEpochInStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTimeV2>(6));
+    std::vector<int64_t> values = {0, -377673580800000001LL};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.time_unit = DecodedTimeUnit::MICROS;
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableDateTimeV2NullsOutOfRangeEpochInNonStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTimeV2>(6));
+    std::vector<int64_t> values = {0, -377673580800000001LL, 1};
+    std::vector<uint8_t> null_map = {0, 0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    expect_column_strings(*type, *result.column,
+                          {"1970-01-01 00:00:00.000000", "NULL", "1970-01-01 00:00:00.000001"});
+}
+
+// ----------------------------------------------------------------------
+// TimeV2 SerDe
+// ----------------------------------------------------------------------
+// TimeV2 decodes INT32 as milliseconds and INT64 according to the supplied time unit. Negative
+// durations are verified because they use a sign bit in TimeValue::TimeType rather than DateTimeV2
+// epoch arithmetic.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt32Millis) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    std::vector<int32_t> values = {0, 1, -1, 3661001};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(
+            *type, *result.column,
+            {"00:00:00.000000", "00:00:00.001000", "-00:00:00.001000", "01:01:01.001000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Micros) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    std::vector<int64_t> values = {0, 1, -1, 3661000001LL};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(
+            *type, *result.column,
+            {"00:00:00.000000", "00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"});
+
+    view.time_unit = DecodedTimeUnit::UNKNOWN;
+    auto unknown_result = read_column(type, view);
+    ASSERT_TRUE(unknown_result.status.ok()) << unknown_result.status;
+    expect_column_strings(
+            *type, *unknown_result.column,
+            {"00:00:00.000000", "00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Millis) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    std::vector<int64_t> values = {1, -1, 3661001};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::MILLIS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"00:00:00.001000", "-00:00:00.001000", "01:01:01.001000"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadTimeV2FromInt64Nanos) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    std::vector<int64_t> values = {1000, -1000, 3661000001000LL};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+    view.time_unit = DecodedTimeUnit::NANOS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column,
+                          {"00:00:00.000001", "-00:00:00.000001", "01:01:01.000001"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, TimeV2HandlesNulls) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeTimeV2>(6));
+    std::vector<int64_t> values = {0, 1, 2};
+    std::vector<uint8_t> null_map = {0, 1, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.time_unit = DecodedTimeUnit::MICROS;
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    expect_column_strings(*type, *result.column, {"00:00:00.000000", "NULL", "00:00:00.000002"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, TimeV2RejectsInvalidKind) {
+    auto type = std::make_shared<DataTypeTimeV2>(6);
+    for (auto kind : {DecodedValueKind::BOOL, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}) {
+        std::vector<int64_t> values = {0};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+// ----------------------------------------------------------------------
+// Decimal SerDe
+// ----------------------------------------------------------------------
+// Decimal cases cover integer-backed values and Parquet big-endian two's-complement binary values.
+// String assertions validate the user-visible scale, while direct column checks lock the native
+// unscaled value for every decimal width.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal32FromInt32) {
+    auto type = std::make_shared<DataTypeDecimal32>(9, 2);
+    std::vector<int32_t> values = {12345, -67, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnDecimal32&>(*result.column);
+    EXPECT_EQ(Decimal32(12345), column.get_element(0));
+    EXPECT_EQ(Decimal32(-67), column.get_element(1));
+    EXPECT_EQ(Decimal32(0), column.get_element(2));
+    expect_column_strings(*type, *result.column, {"123.45", "-0.67", "0.00"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal64FromInt64) {
+    auto type = std::make_shared<DataTypeDecimal64>(18, 4);
+    std::vector<int64_t> values = {123456789, -1};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnDecimal64&>(*result.column);
+    EXPECT_EQ(Decimal64(123456789), column.get_element(0));
+    EXPECT_EQ(Decimal64(-1), column.get_element(1));
+    expect_column_strings(*type, *result.column, {"12345.6789", "-0.0001"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal128FromInt32AndInt64) {
+    auto type = std::make_shared<DataTypeDecimal128>(38, 6);
+    {
+        std::vector<int32_t> values = {123456, -1};
+        auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnDecimal128V3&>(*result.column);
+        EXPECT_EQ(decimal128_v3(123456), column.get_element(0));
+        EXPECT_EQ(decimal128_v3(-1), column.get_element(1));
+        expect_column_strings(*type, *result.column, {"0.123456", "-0.000001"});
+    }
+    {
+        std::vector<int64_t> values = {1234567890123LL, -1234567LL};
+        auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& column = assert_cast<const ColumnDecimal128V3&>(*result.column);
+        EXPECT_EQ(decimal128_v3(1234567890123LL), column.get_element(0));
+        EXPECT_EQ(decimal128_v3(-1234567LL), column.get_element(1));
+        expect_column_strings(*type, *result.column, {"1234567.890123", "-1.234567"});
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimal256FromInt64) {
+    auto type = std::make_shared<DataTypeDecimal256>(76, 8);
+    std::vector<int64_t> values = {std::numeric_limits<int64_t>::max(),
+                                   std::numeric_limits<int64_t>::min()};
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnDecimal256&>(*result.column);
+    EXPECT_EQ(decimal256_from_int64(std::numeric_limits<int64_t>::max()), column.get_element(0));
+    EXPECT_EQ(decimal256_from_int64(std::numeric_limits<int64_t>::min()), column.get_element(1));
+    expect_column_strings(*type, *result.column, {"92233720368.54775807", "-92233720368.54775808"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimalFromBinaryBigEndian) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    std::vector<std::string> storage = {
+            std::string("\x00", 1), std::string("\x7f", 1),     std::string("\x80", 1),
+            std::string("\xff", 1), std::string("\xff\xbd", 2), std::string("\x30\x39", 2),
+    };
+    auto refs = string_refs(storage);
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& column = assert_cast<const ColumnDecimal128V3&>(*result.column);
+    std::vector<Decimal128V3> expected = {decimal128_v3(0),    decimal128_v3(127),
+                                          decimal128_v3(-128), decimal128_v3(-1),
+                                          decimal128_v3(-67),  decimal128_v3(12345)};
+    ASSERT_EQ(expected.size(), column.size());
+    for (size_t row = 0; row < expected.size(); ++row) {
+        EXPECT_EQ(expected[row], column.get_element(row)) << "row=" << row;
+    }
+    expect_column_strings(*type, *result.column,
+                          {"0.00", "1.27", "-1.28", "-0.01", "-0.67", "123.45"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadDecimalFromFixedBinaryLengths) {
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {std::string("\x00", 1), std::string("\x80", 1)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 1));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"0.00", "-1.28"});
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {std::string("\xff\xbd", 2), std::string("\x30\x39", 2)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 2));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"-0.67", "123.45"});
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {std::string("\0\0\0\0\0\0\x30\x39", 8)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 8));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"123.45"});
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {
+                std::string("\xff\xff\xff\xff\xff\xff\xff\xff"
+                            "\xff\xff\xff\xff\xff\xff\xff\xbd",
+                            16)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 16));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"-0.67"});
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal256>(76, 2);
+        std::vector<std::string> storage = {std::string(31, '\xff') + std::string("\xbd", 1)};
+        auto refs = string_refs(storage);
+        auto result = read_column(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 32));
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        expect_column_strings(*type, *result.column, {"-0.67"});
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalHandlesNulls) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal128>(18, 2));
+    std::vector<int64_t> values = {12345, -1, -67};
+    std::vector<uint8_t> null_map = {0, 1, 0};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT64, values, &null_map));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& decimal_column =
+            assert_cast<const ColumnDecimal128V3&>(nullable_column.get_nested_column());
+    ASSERT_EQ(3, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_EQ(decimal128_v3(12345), decimal_column.get_element(0));
+    EXPECT_EQ(decimal128_v3(0), decimal_column.get_element(1));
+    EXPECT_EQ(decimal128_v3(-67), decimal_column.get_element(2));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsOutOfRangeValueInStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal32>(9, 2));
+    std::vector<int64_t> values = {999999999, 1000000000};
+    std::vector<uint8_t> null_map = {0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_data_quality_error(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalNullsOutOfRangeValueInNonStrictMode) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal32>(9, 2));
+    std::vector<int64_t> values = {999999999, 1000000000, -1000000000, -999999999};
+    std::vector<uint8_t> null_map = {0, 0, 0, 0};
+    auto view = make_fixed_view(DecodedValueKind::INT64, values, &null_map);
+
+    auto result = read_column(type, view);
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& decimal_column =
+            assert_cast<const ColumnDecimal32&>(nullable_column.get_nested_column());
+    ASSERT_EQ(4, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_TRUE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_EQ(Decimal32(999999999), decimal_column.get_element(0));
+    EXPECT_EQ(Decimal32(0), decimal_column.get_element(1));
+    EXPECT_EQ(Decimal32(0), decimal_column.get_element(2));
+    EXPECT_EQ(Decimal32(-999999999), decimal_column.get_element(3));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsNullBinaryDataWithPositiveLength) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    std::vector<StringRef> refs = {StringRef(static_cast<const char*>(nullptr), 2)};
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    expect_corruption(result.status);
+    EXPECT_NE(std::string::npos, result.status.to_string().find("row 0"));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalAllowsZeroLengthBinaryAsZero) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    std::vector<StringRef> refs = {StringRef(static_cast<const char*>(nullptr), 0),
+                                   StringRef("", 0)};
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    expect_column_strings(*type, *result.column, {"0.00", "0.00"});
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsInvalidKind) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    for (auto kind : {DecodedValueKind::BOOL, DecodedValueKind::FLOAT, DecodedValueKind::DOUBLE,
+                      DecodedValueKind::UINT64}) {
+        std::vector<int64_t> values = {0};
+        auto result = read_column(type, make_fixed_view(kind, values));
+        expect_not_supported(result.status);
+        EXPECT_EQ(0, result.column->size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, DecimalRejectsMissingBufferWhenNonNullExists) {
+    auto type = std::make_shared<DataTypeDecimal128>(18, 2);
+    {
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::INT64;
+        view.row_count = 1;
+        auto result = read_column(type, view);
+        expect_corruption(result.status);
+    }
+    {
+        DecodedColumnView view;
+        view.value_kind = DecodedValueKind::BINARY;
+        view.row_count = 1;
+        auto result = read_column(type, view);
+        expect_corruption(result.status);
+    }
+}
+
+// ----------------------------------------------------------------------
+// Nullable SerDe wrapper
+// ----------------------------------------------------------------------
+// Nullable tests focus on wrapper responsibilities: copying the outer null map, inserting default
+// nested values for null rows, treating a missing null_map as all non-null, appending to existing
+// columns, and rolling back outer state when the nested reader rejects the input.
+
+TEST(DataTypeSerDeDecodedValuesTest, NullablePropagatesNullMapAndReadsNested) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    std::vector<int32_t> values = {10, 20, 30, 40};
+    std::vector<uint8_t> null_map = {0, 1, 0, 1};
+
+    auto result = read_column(type, make_fixed_view(DecodedValueKind::INT32, values, &null_map));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    const auto& nested_column =
+            assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+    ASSERT_EQ(4, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_TRUE(nullable_column.is_null_at(3));
+    EXPECT_EQ(10, nested_column.get_element(0));
+    EXPECT_EQ(0, nested_column.get_element(1));
+    EXPECT_EQ(30, nested_column.get_element(2));
+    EXPECT_EQ(0, nested_column.get_element(3));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableWithoutNullMapReadsAllNonNull) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    std::vector<std::string> storage = {"alpha", "beta"};
+    auto refs = string_refs(storage);
+
+    auto result = read_column(type, make_binary_view(DecodedValueKind::BINARY, refs));
+
+    ASSERT_TRUE(result.status.ok()) << result.status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(2, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    expect_binary_column(nullable_column.get_nested_column(), storage);
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableAllNullDoesNotRequireNestedBuffer) {
+    std::vector<uint8_t> null_map = {1, 1};
+    std::vector<DataTypePtr> types = {
+            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateV2>()),
+            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal128>(18, 2)),
+            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
+            std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>()),
+    };
+
+    for (const auto& type : types) {
+        DecodedColumnView view;
+        view.value_kind = type->get_name().find("String") != std::string::npos
+                                  ? DecodedValueKind::BINARY
+                                  : DecodedValueKind::INT32;
+        view.row_count = 2;
+        view.null_map = null_map.data();
+        auto result = read_column(type, view);
+        ASSERT_TRUE(result.status.ok()) << result.status << ", type=" << type->get_name();
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+        ASSERT_EQ(2, nullable_column.size());
+        EXPECT_TRUE(nullable_column.is_null_at(0));
+        EXPECT_TRUE(nullable_column.is_null_at(1));
+        EXPECT_EQ(2, nullable_column.get_nested_column().size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableAppendToExistingColumn) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    auto column = type->create_column();
+
+    std::vector<int32_t> first_values = {1, 2};
+    auto first_status = type->get_serde()->read_column_from_decoded_values(
+            *column, make_fixed_view(DecodedValueKind::INT32, first_values));
+    ASSERT_TRUE(first_status.ok()) << first_status;
+
+    std::vector<int32_t> second_values = {10, 20, 30};
+    std::vector<uint8_t> second_null_map = {0, 1, 0};
+    auto second_status = type->get_serde()->read_column_from_decoded_values(
+            *column, make_fixed_view(DecodedValueKind::INT32, second_values, &second_null_map));
+    ASSERT_TRUE(second_status.ok()) << second_status;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    const auto& nested_column =
+            assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+    ASSERT_EQ(5, nullable_column.size());
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_TRUE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+    EXPECT_EQ(1, nested_column.get_element(0));
+    EXPECT_EQ(2, nested_column.get_element(1));
+    EXPECT_EQ(10, nested_column.get_element(2));
+    EXPECT_EQ(0, nested_column.get_element(3));
+    EXPECT_EQ(30, nested_column.get_element(4));
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullablePropagatesNestedError) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    auto column = type->create_column();
+    std::vector<double> values = {1.0};
+    std::vector<uint8_t> null_map = {0};
+    auto view = make_fixed_view(DecodedValueKind::DOUBLE, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto status = type->get_serde()->read_column_from_decoded_values(*column, view);
+
+    expect_not_supported(status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableNonStrictModeNullsUnsupportedDecodedKindForAllTypes) {
+    struct Case {
+        DataTypePtr type;
+        DecodedValueKind kind;
+    };
+    std::vector<Case> cases = {
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeBool>()),
+             DecodedValueKind::INT32},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>()),
+             DecodedValueKind::DOUBLE},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeFloat64>()),
+             DecodedValueKind::FLOAT},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()),
+             DecodedValueKind::INT64},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateV2>()),
+             DecodedValueKind::INT64},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTimeV2>(6)),
+             DecodedValueKind::DOUBLE},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeTimeV2>(6)),
+             DecodedValueKind::DOUBLE},
+            {std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal128>(18, 2)),
+             DecodedValueKind::DOUBLE},
+    };
+
+    std::vector<int64_t> values = {1, 2};
+    for (const auto& test_case : cases) {
+        auto view = make_fixed_view(test_case.kind, values);
+
+        auto result = read_column(test_case.type, view);
+
+        ASSERT_TRUE(result.status.ok()) << result.status << ", type=" << test_case.type->get_name();
+        expect_nullable_all_null(*result.column, values.size());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableStrictModeRejectsUnsupportedDecodedKind) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    std::vector<double> values = {1.0};
+    std::vector<uint8_t> null_map = {0};
+    auto view = make_fixed_view(DecodedValueKind::DOUBLE, values, &null_map);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_not_supported(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableNonStrictModeNullsRowLevelDecodedConversionFailure) {
+    {
+        auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+        std::vector<StringRef> refs = {StringRef("ok", 2),
+                                       StringRef(static_cast<const char*>(nullptr), 2),
+                                       StringRef("", 0)};
+        auto view = make_binary_view(DecodedValueKind::BINARY, refs);
+
+        auto result = read_column(type, view);
+
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+        ASSERT_EQ(3, nullable_column.size());
+        EXPECT_FALSE(nullable_column.is_null_at(0));
+        EXPECT_TRUE(nullable_column.is_null_at(1));
+        EXPECT_FALSE(nullable_column.is_null_at(2));
+        expect_binary_column(nullable_column.get_nested_column(), {"ok", "", ""});
+    }
+    {
+        auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimal128>(18, 2));
+        std::vector<StringRef> refs = {StringRef("\x30\x39", 2),
+                                       StringRef(static_cast<const char*>(nullptr), 2)};
+        auto view = make_binary_view(DecodedValueKind::BINARY, refs);
+
+        auto result = read_column(type, view);
+
+        ASSERT_TRUE(result.status.ok()) << result.status;
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+        ASSERT_EQ(2, nullable_column.size());
+        EXPECT_FALSE(nullable_column.is_null_at(0));
+        EXPECT_TRUE(nullable_column.is_null_at(1));
+        expect_column_strings(*type, *result.column, {"123.45", "NULL"});
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, NullableStrictModeRejectsRowLevelDecodedConversionFailure) {
+    auto type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+    std::vector<StringRef> refs = {StringRef("ok", 2),
+                                   StringRef(static_cast<const char*>(nullptr), 2)};
+    auto view = make_binary_view(DecodedValueKind::BINARY, refs);
+    view.enable_strict_mode = true;
+
+    auto result = read_column(type, view);
+
+    expect_corruption(result.status);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    EXPECT_EQ(0, nullable_column.size());
+    EXPECT_EQ(0, nullable_column.get_null_map_data().size());
+    EXPECT_EQ(0, nullable_column.get_nested_column().size());
+}
+
+// ----------------------------------------------------------------------
+// read_field_from_decoded_value
+// ----------------------------------------------------------------------
+// The field path is used by Parquet min/max and pruning code. It must be covered independently
+// because it creates a one-row column, delegates to the batch reader, and extracts a Field value.
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldPrimitiveValues) {
+    {
+        std::vector<uint8_t> values = {true};
+        auto field = read_field(std::make_shared<DataTypeBool>(), make_bool_view(values));
+        EXPECT_EQ(TYPE_BOOLEAN, field.get_type());
+        EXPECT_TRUE(field.get<TYPE_BOOLEAN>());
+    }
+    {
+        std::vector<int32_t> values = {-42};
+        auto field = read_field(std::make_shared<DataTypeInt32>(),
+                                make_fixed_view(DecodedValueKind::INT32, values));
+        EXPECT_EQ(TYPE_INT, field.get_type());
+        EXPECT_EQ(-42, field.get<TYPE_INT>());
+    }
+    {
+        std::vector<int64_t> values = {1234567890123LL};
+        auto field = read_field(std::make_shared<DataTypeInt64>(),
+                                make_fixed_view(DecodedValueKind::INT64, values));
+        EXPECT_EQ(TYPE_BIGINT, field.get_type());
+        EXPECT_EQ(1234567890123LL, field.get<TYPE_BIGINT>());
+    }
+    {
+        std::vector<int64_t> values = {-9};
+        auto field = read_field(std::make_shared<DataTypeInt128>(),
+                                make_fixed_view(DecodedValueKind::INT64, values));
+        EXPECT_EQ(TYPE_LARGEINT, field.get_type());
+        EXPECT_EQ(static_cast<__int128_t>(-9), field.get<TYPE_LARGEINT>());
+    }
+    {
+        std::vector<float> values = {std::numeric_limits<float>::quiet_NaN()};
+        auto field = read_field(std::make_shared<DataTypeFloat32>(),
+                                make_fixed_view(DecodedValueKind::FLOAT, values));
+        EXPECT_EQ(TYPE_FLOAT, field.get_type());
+        EXPECT_TRUE(std::isnan(field.get<TYPE_FLOAT>()));
+    }
+    {
+        std::vector<double> values = {std::numeric_limits<double>::infinity()};
+        auto field = read_field(std::make_shared<DataTypeFloat64>(),
+                                make_fixed_view(DecodedValueKind::DOUBLE, values));
+        EXPECT_EQ(TYPE_DOUBLE, field.get_type());
+        EXPECT_TRUE(std::isinf(field.get<TYPE_DOUBLE>()));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldLogicalIntegerCastsPhysicalValue) {
+    {
+        std::vector<int32_t> values = {32767};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::INT32, values), 8, false);
+        auto field = read_field(std::make_shared<DataTypeInt16>(), view);
+        EXPECT_EQ(TYPE_SMALLINT, field.get_type());
+        EXPECT_EQ(255, field.get<TYPE_SMALLINT>());
+    }
+    {
+        std::vector<int32_t> values = {-1};
+        auto view =
+                with_logical_integer(make_fixed_view(DecodedValueKind::UINT32, values), 32, false);
+        auto field = read_field(std::make_shared<DataTypeInt64>(), view);
+        EXPECT_EQ(TYPE_BIGINT, field.get_type());
+        EXPECT_EQ(4294967295LL, field.get<TYPE_BIGINT>());
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldStringValues) {
+    auto type = std::make_shared<DataTypeString>();
+    std::vector<std::string> storage = {std::string("a\0b", 3)};
+    auto refs = string_refs(storage);
+    auto field = read_field(type, make_binary_view(DecodedValueKind::BINARY, refs));
+    EXPECT_EQ(TYPE_STRING, field.get_type());
+    EXPECT_EQ(std::string("a\0b", 3), field.get<TYPE_STRING>());
+
+    std::vector<std::string> fixed_storage = {std::string("\x00\x01\x02\x03", 4)};
+    auto fixed_refs = string_refs(fixed_storage);
+    auto fixed_field =
+            read_field(type, make_binary_view(DecodedValueKind::FIXED_BINARY, fixed_refs, 4));
+    EXPECT_EQ(TYPE_STRING, fixed_field.get_type());
+    EXPECT_EQ(std::string("\x00\x01\x02\x03", 4), fixed_field.get<TYPE_STRING>());
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldDateTimeAndTimeValues) {
+    {
+        auto type = std::make_shared<DataTypeDateV2>();
+        std::vector<int32_t> values = {18628};
+        auto field = read_field(type, make_fixed_view(DecodedValueKind::INT32, values));
+        EXPECT_EQ(TYPE_DATEV2, field.get_type());
+        EXPECT_EQ("2021-01-01", field.to_debug_string(0));
+    }
+    {
+        auto type = std::make_shared<DataTypeDateTimeV2>(6);
+        std::vector<int64_t> values = {1234567};
+        auto view = make_fixed_view(DecodedValueKind::INT64, values);
+        view.time_unit = DecodedTimeUnit::MICROS;
+        auto field = read_field(type, view);
+        EXPECT_EQ(TYPE_DATETIMEV2, field.get_type());
+        EXPECT_EQ("1970-01-01 00:00:01.234567", field.to_debug_string(6));
+    }
+    {
+        auto type = std::make_shared<DataTypeDateTimeV2>(6);
+        std::vector<int64_t> values = {1234};
+        auto view = make_fixed_view(DecodedValueKind::INT64, values);
+        view.time_unit = DecodedTimeUnit::MILLIS;
+        auto field = read_field(type, view);
+        EXPECT_EQ(TYPE_DATETIMEV2, field.get_type());
+        EXPECT_EQ("1970-01-01 00:00:01.234000", field.to_debug_string(6));
+    }
+    {
+        auto type = std::make_shared<DataTypeDateTimeV2>(6);
+        std::vector<TestInt96Timestamp> values = {{0, 2440588}};
+        auto field = read_field(type, make_fixed_view(DecodedValueKind::INT96, values));
+        EXPECT_EQ(TYPE_DATETIMEV2, field.get_type());
+        EXPECT_EQ("1970-01-01 00:00:00.000000", field.to_debug_string(6));
+    }
+    {
+        auto type = std::make_shared<DataTypeTimeV2>(6);
+        std::vector<int64_t> values = {3661000001LL};
+        auto view = make_fixed_view(DecodedValueKind::INT64, values);
+        view.time_unit = DecodedTimeUnit::MICROS;
+        auto field = read_field(type, view);
+        EXPECT_EQ(TYPE_TIMEV2, field.get_type());
+        auto column = type->create_column();
+        column->insert(field);
+        expect_column_strings(*type, *column, {"01:01:01.000001"});
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldDecimalValues) {
+    {
+        auto type = std::make_shared<DataTypeDecimal32>(9, 2);
+        std::vector<int32_t> values = {12345};
+        auto field = read_field(type, make_fixed_view(DecodedValueKind::INT32, values));
+        EXPECT_EQ(TYPE_DECIMAL32, field.get_type());
+        EXPECT_EQ("123.45", field.to_debug_string(2));
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal64>(18, 4);
+        std::vector<int64_t> values = {-1};
+        auto field = read_field(type, make_fixed_view(DecodedValueKind::INT64, values));
+        EXPECT_EQ(TYPE_DECIMAL64, field.get_type());
+        EXPECT_EQ("-0.0001", field.to_debug_string(4));
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal128>(38, 2);
+        std::vector<std::string> storage = {std::string("\x30\x39", 2)};
+        auto refs = string_refs(storage);
+        auto field = read_field(type, make_binary_view(DecodedValueKind::BINARY, refs));
+        EXPECT_EQ(TYPE_DECIMAL128I, field.get_type());
+        EXPECT_EQ("123.45", field.to_debug_string(2));
+    }
+    {
+        auto type = std::make_shared<DataTypeDecimal256>(76, 2);
+        std::vector<std::string> storage = {std::string(31, '\xff') + std::string("\xbd", 1)};
+        auto refs = string_refs(storage);
+        auto field = read_field(type, make_binary_view(DecodedValueKind::FIXED_BINARY, refs, 32));
+        EXPECT_EQ(TYPE_DECIMAL256, field.get_type());
+        EXPECT_EQ("-0.67", field.to_debug_string(2));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesTest, ReadFieldPropagatesUnsupportedKind) {
+    {
+        auto type = std::make_shared<DataTypeString>();
+        std::vector<int32_t> values = {1};
+        expect_not_supported(
+                read_field_status(type, make_fixed_view(DecodedValueKind::INT32, values)));
+    }
+    {
+        auto type = std::make_shared<DataTypeInt32>();
+        std::vector<double> values = {1.0};
+        expect_not_supported(
+                read_field_status(type, make_fixed_view(DecodedValueKind::DOUBLE, values)));
+    }
+    {
+        auto type = std::make_shared<DataTypeDateV2>();
+        std::vector<int64_t> values = {0};
+        expect_not_supported(
+                read_field_status(type, make_fixed_view(DecodedValueKind::INT64, values)));
+    }
+}
+
+TEST(DataTypeSerDeDecodedValuesDeathTest, ReadFieldRejectsInvalidRowCountDeathTest) {
+    auto type = std::make_shared<DataTypeInt32>();
+    std::vector<int32_t> values = {1, 2};
+    Field field;
+
+    auto zero_row_view = make_fixed_view(DecodedValueKind::INT32, values);
+    zero_row_view.row_count = 0;
+    EXPECT_DEATH(
+            {
+                auto status = type->get_serde()->read_field_from_decoded_value(*type, &field,
+                                                                               zero_row_view);
+                (void)status;
+            },
+            "view.row_count == 1");
+
+    auto two_row_view = make_fixed_view(DecodedValueKind::INT32, values);
+    two_row_view.row_count = 2;
+    EXPECT_DEATH(
+            {
+                auto status = type->get_serde()->read_field_from_decoded_value(*type, &field,
+                                                                               two_row_view);
+                (void)status;
+            },
+            "view.row_count == 1");
+}
+
+TEST(DataTypeSerDeDecodedValuesDeathTest, ReadFieldRejectsNullFieldPointerDeathTest) {
+    auto type = std::make_shared<DataTypeInt32>();
+    std::vector<int32_t> values = {1};
+    auto view = make_fixed_view(DecodedValueKind::INT32, values);
+
+    EXPECT_DEATH(
+            {
+                auto status =
+                        type->get_serde()->read_field_from_decoded_value(*type, nullptr, view);
+                (void)status;
+            },
+            "field != nullptr");
+}
+
+// ----------------------------------------------------------------------
+// Illegal kind matrix
+// ----------------------------------------------------------------------
+// This compact matrix complements the focused error tests above by ensuring each decoded-aware
+// family rejects representative illegal physical kinds without mutating an empty destination.
+
+TEST(DataTypeSerDeDecodedValuesTest, IllegalKindMatrixRejectsUnsupportedCombinations) {
+    struct Case {
+        DataTypePtr type;
+        std::vector<DecodedValueKind> illegal_kinds;
+    };
+    std::vector<Case> cases = {
+            {std::make_shared<DataTypeBool>(), {DecodedValueKind::INT32, DecodedValueKind::BINARY}},
+            {std::make_shared<DataTypeInt32>(),
+             {DecodedValueKind::BOOL, DecodedValueKind::FLOAT, DecodedValueKind::DOUBLE,
+              DecodedValueKind::BINARY}},
+            {std::make_shared<DataTypeFloat32>(),
+             {DecodedValueKind::DOUBLE, DecodedValueKind::INT32}},
+            {std::make_shared<DataTypeFloat64>(),
+             {DecodedValueKind::FLOAT, DecodedValueKind::INT64}},
+            {std::make_shared<DataTypeString>(),
+             {DecodedValueKind::INT32, DecodedValueKind::DOUBLE}},
+            {std::make_shared<DataTypeDateV2>(),
+             {DecodedValueKind::INT64, DecodedValueKind::BINARY}},
+            {std::make_shared<DataTypeDateTimeV2>(6),
+             {DecodedValueKind::INT32, DecodedValueKind::DOUBLE, DecodedValueKind::BINARY}},
+            {std::make_shared<DataTypeTimeV2>(6),
+             {DecodedValueKind::BOOL, DecodedValueKind::BINARY, DecodedValueKind::DOUBLE}},
+            {std::make_shared<DataTypeDecimal128>(18, 2),
+             {DecodedValueKind::BOOL, DecodedValueKind::UINT64, DecodedValueKind::FLOAT,
+              DecodedValueKind::DOUBLE}},
+    };
+
+    for (const auto& test_case : cases) {
+        for (auto kind : test_case.illegal_kinds) {
+            std::vector<int64_t> values = {0};
+            auto result = read_column(test_case.type, make_fixed_view(kind, values));
+            expect_not_supported(result.status);
+            EXPECT_EQ(0, result.column->size()) << test_case.type->get_name();
+        }
+    }
+}
+
+} // namespace doris
diff --git a/be/test/core/data_type_serde/data_type_serde_pb_test.cpp b/be/test/core/data_type_serde/data_type_serde_pb_test.cpp
index 986583982eb2bd..c1663bf7a9dd49 100644
--- a/be/test/core/data_type_serde/data_type_serde_pb_test.cpp
+++ b/be/test/core/data_type_serde/data_type_serde_pb_test.cpp
@@ -54,6 +54,7 @@
 #include "core/data_type/data_type_quantilestate.h"
 #include "core/data_type/data_type_string.h"
 #include "core/data_type/data_type_struct.h"
+#include "core/data_type/data_type_timestamptz.h"
 #include "core/data_type_serde/data_type_serde.h"
 #include "core/types.h"
 #include "core/value/bitmap_value.h"
@@ -646,6 +647,17 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestDateTime) {
     }
 }
 
+TEST(DataTypeSerDePbTest, DataTypeTimeStampTzToProtobufKeepsScale) {
+    DataTypePtr data_type(std::make_shared<DataTypeTimeStampTz>(6));
+    PTypeDesc type_desc;
+    data_type->to_protobuf(&type_desc);
+
+    ASSERT_EQ(type_desc.types_size(), 1);
+    const auto& scalar_type = type_desc.types(0).scalar_type();
+    EXPECT_EQ(scalar_type.type(), TPrimitiveType::TIMESTAMPTZ);
+    EXPECT_EQ(scalar_type.scale(), 6);
+}
+
 TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestLargeInt) {
     std::cout << "==== LargeInt === " << std::endl;
     // LargeInt
@@ -662,4 +674,4 @@ TEST(DataTypeSerDePbTest, DataTypeScalaSerDeTestLargeInt) {
         check_pb_col(data_type, *vec.get());
     }
 }
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp b/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp
index 403ef8713e4e67..b3e512734c6e73 100644
--- a/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp
+++ b/be/test/exec/runtime_filter/runtime_filter_expr_sampling_test.cpp
@@ -18,10 +18,13 @@
 #include <glog/logging.h>
 #include <gtest/gtest.h>
 
+#include "core/data_type/data_type_number.h"
 #include "exec/runtime_filter/runtime_filter_selectivity.h"
 #include "exec/runtime_filter/runtime_filter_test_utils.h"
 #include "exprs/runtime_filter_expr.h"
+#include "exprs/vdirect_in_predicate.h"
 #include "exprs/vexpr_context.h"
+#include "exprs/vslot_ref.h"
 
 namespace doris {
 
@@ -178,4 +181,47 @@ TEST_F(RuntimeFilterExprSamplingTest, sampling_frequency_survives_context_recrea
     EXPECT_TRUE(selectivity.maybe_always_true_can_ignore());
 }
 
+// RuntimeFilterExpr exposes _impl->children(), but the wrapper itself does not own those
+// children in its own _children vector. Deep clone must therefore clone _impl explicitly.
+TEST_F(RuntimeFilterExprSamplingTest, deep_clone_clones_impl_tree) {
+    auto bool_type = TTypeDescBuilder()
+                             .set_types(TTypeNodeBuilder()
+                                                .set_type(TTypeNodeType::SCALAR)
+                                                .set_scalar_type(TPrimitiveType::BOOLEAN)
+                                                .build())
+                             .build();
+    TExprNode node = TExprNodeBuilder(TExprNodeType::IN_PRED, bool_type, 0).build();
+    node.in_predicate.__set_is_not_in(false);
+    node.__set_opcode(TExprOpcode::FILTER_IN);
+    node.__set_is_nullable(false);
+
+    auto slot = VSlotRef::create_shared(/*slot_id=*/0, /*column_id=*/0, /*column_uniq_id=*/10,
+                                        std::make_shared<DataTypeInt32>(), "c0");
+    auto impl = VDirectInPredicate::create_shared(node, nullptr);
+    impl->add_child(slot);
+
+    auto wrapper = RuntimeFilterExpr::create_shared(node, impl, 0.4, false, /*filter_id=*/7,
+                                                    /*sampling_frequency=*/32);
+
+    VExprSPtr cloned_expr;
+    ASSERT_TRUE(wrapper->deep_clone(&cloned_expr).ok());
+
+    auto* cloned_wrapper = dynamic_cast<RuntimeFilterExpr*>(cloned_expr.get());
+    ASSERT_NE(cloned_wrapper, nullptr);
+    EXPECT_NE(cloned_wrapper, wrapper.get());
+    EXPECT_EQ(cloned_wrapper->filter_id(), 7);
+
+    auto cloned_impl = cloned_wrapper->get_impl();
+    ASSERT_NE(cloned_impl, nullptr);
+    EXPECT_NE(cloned_impl.get(), impl.get());
+    ASSERT_EQ(cloned_impl->get_num_children(), 1);
+    EXPECT_NE(cloned_impl->children()[0].get(), slot.get());
+
+    auto* cloned_slot = dynamic_cast<VSlotRef*>(cloned_impl->children()[0].get());
+    ASSERT_NE(cloned_slot, nullptr);
+    EXPECT_EQ(cloned_slot->column_id(), 0);
+    EXPECT_EQ(cloned_slot->column_uniq_id(), 10);
+    EXPECT_EQ(cloned_slot->column_name(), "c0");
+}
+
 } // namespace doris
diff --git a/be/test/exec/scan/access_path_parser_test.cpp b/be/test/exec/scan/access_path_parser_test.cpp
new file mode 100644
index 00000000000000..d4bd6ab6c06360
--- /dev/null
+++ b/be/test/exec/scan/access_path_parser_test.cpp
@@ -0,0 +1,371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/scan/access_path_parser.h"
+
+#include <gen_cpp/Descriptors_types.h>
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/field.h"
+
+namespace doris {
+namespace {
+
+TColumnAccessPath data_access_path(std::vector<std::string> path) {
+    TColumnAccessPath access_path;
+    access_path.__set_type(TAccessPathType::DATA);
+    TDataAccessPath data_path;
+    data_path.__set_path(std::move(path));
+    access_path.__set_data_access_path(std::move(data_path));
+    return access_path;
+}
+
+TColumnAccessPath data_access_path_without_payload() {
+    TColumnAccessPath access_path;
+    access_path.__set_type(TAccessPathType::DATA);
+    return access_path;
+}
+
+TColumnAccessPath meta_access_path() {
+    TColumnAccessPath access_path;
+    access_path.__set_type(TAccessPathType::META);
+    return access_path;
+}
+
+format::ColumnDefinition field(int32_t id, std::string name, DataTypePtr type,
+                               std::vector<format::ColumnDefinition> children = {},
+                               std::vector<std::string> aliases = {}) {
+    return {
+            .identifier = Field::create_field<TYPE_INT>(id),
+            .name = std::move(name),
+            .name_mapping = std::move(aliases),
+            .type = std::move(type),
+            .children = std::move(children),
+    };
+}
+
+format::ColumnDefinition root_column(int32_t id, std::string name, DataTypePtr type) {
+    return {
+            .identifier = Field::create_field<TYPE_INT>(id),
+            .name = std::move(name),
+            .type = std::move(type),
+    };
+}
+
+void expect_child(const format::ColumnDefinition& child, int32_t id, const std::string& name) {
+    ASSERT_TRUE(child.has_identifier_field_id());
+    EXPECT_EQ(child.get_identifier_field_id(), id);
+    EXPECT_EQ(child.name, name);
+}
+
+const format::ColumnDefinition* find_child_by_name(const format::ColumnDefinition& parent,
+                                                   const std::string& name) {
+    for (const auto& child : parent.children) {
+        if (child.name == name) {
+            return &child;
+        }
+    }
+    return nullptr;
+}
+
+} // namespace
+
+// Scenario: primitive columns and scanner-materialized virtual columns should not build nested
+// children, even when their descriptor carries access paths that are not meaningful to the parser.
+TEST(AccessPathParserTest, IgnoresPrimitiveColumnsAndScannerVirtualColumns) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+
+    // Primitive columns have no nested children, so parser should not inspect even invalid paths.
+    auto primitive = root_column(1, "id", int_type);
+    auto status = AccessPathParser::build_nested_children(
+            &primitive, std::vector<TColumnAccessPath> {meta_access_path()}, nullptr);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_TRUE(primitive.children.empty());
+
+    // Iceberg rowid is materialized by scanner/table-reader logic and may carry a negative access
+    // path. Parser must leave it untouched.
+    auto rowid_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, std::make_shared<DataTypeInt64>(),
+                       std::make_shared<DataTypeInt32>(), string_type},
+            Strings {"file_path", "row_pos", "partition_spec_id", "partition_data_json"});
+    format::ColumnDefinition rowid {
+            .identifier = Field::create_field<TYPE_STRING>(BeConsts::ICEBERG_ROWID_COL),
+            .name = BeConsts::ICEBERG_ROWID_COL,
+            .type = rowid_type,
+    };
+    status = AccessPathParser::build_nested_children(
+            &rowid, std::vector<TColumnAccessPath> {data_access_path({"-1"})}, nullptr);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_TRUE(rowid.children.empty());
+}
+
+// Scenario: reject unsupported top-level inputs before recursive type parsing, including META
+// paths, missing DATA payloads, and access paths whose root does not match the projected slot.
+TEST(AccessPathParserTest, RejectsUnsupportedTopLevelAccessPathInputs) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"a"});
+
+    struct Case {
+        std::string name;
+        format::ColumnDefinition column;
+        std::vector<TColumnAccessPath> paths;
+    };
+    std::vector<Case> cases;
+    cases.push_back({"meta path", root_column(100, "s", struct_type), {meta_access_path()}});
+    cases.push_back({"missing DATA payload",
+                     root_column(100, "s", struct_type),
+                     {data_access_path_without_payload()}});
+    cases.push_back({"wrong root name",
+                     root_column(100, "s", struct_type),
+                     {data_access_path({"other", "a"})}});
+    cases.push_back({"wrong root field id",
+                     root_column(100, "s", struct_type),
+                     {data_access_path({"101", "a"})}});
+
+    for (auto& test_case : cases) {
+        auto status = AccessPathParser::build_nested_children(&test_case.column, test_case.paths,
+                                                              nullptr);
+        EXPECT_FALSE(status.ok()) << test_case.name;
+    }
+}
+
+// Scenario: struct access paths support field-id lookup, alias lookup, case-insensitive name
+// fallback, and whole-struct expansion; reserved array/map path tokens remain invalid.
+TEST(AccessPathParserTest, StructAccessPathMatrix) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, int_type}, Strings {"a", "b"});
+    format::ColumnDefinition schema {
+            .identifier = Field::create_field<TYPE_INT>(100),
+            .name = "s",
+            .type = struct_type,
+            .children =
+                    {
+                            field(101, "a", int_type),
+                            field(205, "b", int_type, {}, {"old_b"}),
+                    },
+    };
+
+    {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"s", "A"})}, nullptr);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 0, "a");
+    }
+    {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"100", "205"})},
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 205, "b");
+    }
+    {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"s", "old_b"})},
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 205, "b");
+        EXPECT_EQ(column.children[0].name_mapping, std::vector<std::string>({"old_b"}));
+    }
+    {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"s"})}, &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        expect_child(column.children[0], 101, "a");
+        expect_child(column.children[1], 205, "b");
+    }
+
+    for (const auto& invalid_child : {"OFFSET", "*", "KEYS", "VALUES", "missing"}) {
+        auto column = root_column(100, "s", struct_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"s", invalid_child})},
+                &schema);
+        EXPECT_FALSE(status.ok()) << invalid_child;
+    }
+}
+
+// Scenario: array access paths must pass through the "*" element token, then reuse struct child
+// parsing under the element wrapper; invalid array tokens are rejected.
+TEST(AccessPathParserTest, ArrayAccessPathMatrix) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    auto element_type = std::make_shared<DataTypeStruct>(DataTypes {string_type, int_type},
+                                                         Strings {"item", "quantity"});
+    auto array_type = std::make_shared<DataTypeArray>(element_type);
+    format::ColumnDefinition schema {
+            .identifier = Field::create_field<TYPE_INT>(200),
+            .name = "items",
+            .type = array_type,
+            .children =
+                    {
+                            field(201, "element", element_type,
+                                  {
+                                          field(202, "item", string_type, {}, {"old_item"}),
+                                          field(203, "quantity", int_type),
+                                  }),
+                    },
+    };
+
+    {
+        auto column = root_column(200, "items", array_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column,
+                std::vector<TColumnAccessPath> {data_access_path({"items", "*", "old_item"})},
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 201, "element");
+        ASSERT_EQ(column.children[0].children.size(), 1);
+        expect_child(column.children[0].children[0], 202, "item");
+        EXPECT_EQ(column.children[0].children[0].name_mapping,
+                  std::vector<std::string>({"old_item"}));
+    }
+    {
+        auto column = root_column(200, "items", array_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"items"})}, &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 1);
+        expect_child(column.children[0], 201, "element");
+        ASSERT_EQ(column.children[0].children.size(), 2);
+        expect_child(column.children[0].children[0], 202, "item");
+        expect_child(column.children[0].children[1], 203, "quantity");
+    }
+
+    for (const auto& invalid_path : std::vector<std::vector<std::string>> {
+                 {"items", "OFFSET"}, {"items", "item"}, {"items", "*", "missing"}}) {
+        auto column = root_column(200, "items", array_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path(invalid_path)}, &schema);
+        EXPECT_FALSE(status.ok()) << invalid_path.back();
+    }
+}
+
+// Scenario: map access paths split KEYS/VALUES, force the missing side needed for materialization,
+// merge repeated value-child requests, and reject unsupported map child tokens.
+TEST(AccessPathParserTest, MapAccessPathMatrix) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    auto value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, int_type, string_type}, Strings {"full_name", "age", "gender"});
+    auto map_type = std::make_shared<DataTypeMap>(string_type, value_type);
+    format::ColumnDefinition schema {
+            .identifier = Field::create_field<TYPE_INT>(300),
+            .name = "m",
+            .type = map_type,
+            .children =
+                    {
+                            field(301, "key", string_type),
+                            field(302, "value", value_type,
+                                  {
+                                          field(303, "full_name", string_type, {}, {"name"}),
+                                          field(304, "age", int_type),
+                                          field(305, "gender", string_type),
+                                  }),
+                    },
+    };
+
+    {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"m", "KEYS"})}, &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        expect_child(column.children[0], 301, "key");
+        expect_child(column.children[1], 302, "value");
+        ASSERT_EQ(column.children[1].children.size(), 3);
+        const auto* full_name = find_child_by_name(column.children[1], "full_name");
+        ASSERT_NE(full_name, nullptr);
+        expect_child(*full_name, 303, "full_name");
+        const auto* age = find_child_by_name(column.children[1], "age");
+        ASSERT_NE(age, nullptr);
+        expect_child(*age, 304, "age");
+        const auto* gender = find_child_by_name(column.children[1], "gender");
+        ASSERT_NE(gender, nullptr);
+        expect_child(*gender, 305, "gender");
+    }
+    {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"m", "VALUES", "age"})},
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        expect_child(column.children[0], 301, "key");
+        expect_child(column.children[1], 302, "value");
+        ASSERT_EQ(column.children[1].children.size(), 1);
+        expect_child(column.children[1].children[0], 304, "age");
+    }
+    {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column,
+                std::vector<TColumnAccessPath> {
+                        data_access_path({"m", "VALUES", "name"}),
+                        data_access_path({"m", "*", "gender"}),
+                },
+                &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        ASSERT_EQ(column.children[1].children.size(), 2);
+        const auto* full_name = find_child_by_name(column.children[1], "full_name");
+        ASSERT_NE(full_name, nullptr);
+        expect_child(*full_name, 303, "full_name");
+        EXPECT_EQ(full_name->name_mapping, std::vector<std::string>({"name"}));
+        const auto* gender = find_child_by_name(column.children[1], "gender");
+        ASSERT_NE(gender, nullptr);
+        expect_child(*gender, 305, "gender");
+    }
+    {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path({"m"})}, &schema);
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(column.children.size(), 2);
+        ASSERT_EQ(column.children[1].children.size(), 3);
+    }
+
+    for (const auto& invalid_path : std::vector<std::vector<std::string>> {
+                 {"m", "OFFSET"}, {"m", "ENTRY"}, {"m", "VALUES", "missing"}}) {
+        auto column = root_column(300, "m", map_type);
+        auto status = AccessPathParser::build_nested_children(
+                &column, std::vector<TColumnAccessPath> {data_access_path(invalid_path)}, &schema);
+        EXPECT_FALSE(status.ok()) << invalid_path.back();
+    }
+}
+
+} // namespace doris
diff --git a/be/test/exec/scan/file_scanner_v2_test.cpp b/be/test/exec/scan/file_scanner_v2_test.cpp
new file mode 100644
index 00000000000000..436a18c66decf4
--- /dev/null
+++ b/be/test/exec/scan/file_scanner_v2_test.cpp
@@ -0,0 +1,347 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/scan/file_scanner_v2.h"
+
+#include <gen_cpp/PlanNodes_types.h>
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "exec/scan/split_source_connector.h"
+#include "exprs/runtime_filter_expr.h"
+#include "exprs/vdirect_in_predicate.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/expr/cast.h"
+
+namespace doris {
+namespace {
+
+TFileRangeDesc range_with_format(std::string table_format, TFileFormatType::type format_type) {
+    TFileRangeDesc range;
+    range.__set_format_type(format_type);
+    if (!table_format.empty()) {
+        TTableFormatFileDesc table_desc;
+        table_desc.__set_table_format_type(std::move(table_format));
+        range.__set_table_format_params(std::move(table_desc));
+    }
+    return range;
+}
+
+TFileRangeDesc hudi_range_with_delta_logs() {
+    auto range = range_with_format("hudi", TFileFormatType::FORMAT_PARQUET);
+    THudiFileDesc hudi_params;
+    hudi_params.__set_delta_logs({"delta.log"});
+    range.table_format_params.__set_hudi_params(std::move(hudi_params));
+    return range;
+}
+
+TScanRangeParams scan_range_param(const TFileRangeDesc& range) {
+    TScanRangeParams params;
+    params.scan_range.ext_scan_range.file_scan_range.ranges.push_back(range);
+    return params;
+}
+
+VExprSPtr slot_ref(int slot_id, int column_id, DataTypePtr type, const std::string& name) {
+    return VSlotRef::create_shared(slot_id, column_id, -1, std::move(type), name);
+}
+
+TExprNode bool_in_pred_node() {
+    TTypeDesc bool_type;
+    TTypeNode bool_node;
+    TScalarType bool_scalar_type;
+    bool_scalar_type.__set_type(TPrimitiveType::BOOLEAN);
+    bool_node.__set_type(TTypeNodeType::SCALAR);
+    bool_node.__set_scalar_type(bool_scalar_type);
+    bool_type.types.push_back(bool_node);
+
+    TExprNode node;
+    node.__set_type(bool_type);
+    node.__set_node_type(TExprNodeType::IN_PRED);
+    node.in_predicate.__set_is_not_in(false);
+    node.__set_opcode(TExprOpcode::FILTER_IN);
+    node.__set_is_nullable(false);
+    return node;
+}
+
+} // namespace
+
+// Scenario: FileScannerV2::is_supported should honor table format, scan params format, and the
+// optional per-range file format override as a single matrix.
+TEST(FileScannerV2Test, SupportedFormatMatrix) {
+    struct Case {
+        std::string table_format;
+        TFileFormatType::type params_format;
+        std::optional<TFileFormatType::type> range_format;
+        bool expected;
+    };
+
+    const std::vector<Case> cases {
+            {"", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"tvf", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"iceberg", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"paimon", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"hudi", TFileFormatType::FORMAT_PARQUET, std::nullopt, true},
+            {"jdbc", TFileFormatType::FORMAT_PARQUET, std::nullopt, false},
+            {"", TFileFormatType::FORMAT_JNI, std::nullopt, false},
+            {"hive", TFileFormatType::FORMAT_ORC, std::nullopt, false},
+            {"jdbc", TFileFormatType::FORMAT_JNI, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_JNI, std::nullopt, false},
+            {"", TFileFormatType::FORMAT_CSV_PLAIN, std::nullopt, true},
+            {"tvf", TFileFormatType::FORMAT_CSV_GZ, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_BZ2, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_LZ4FRAME, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_LZ4BLOCK, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_LZOP, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_DEFLATE, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_CSV_SNAPPYBLOCK, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_PROTO, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_TEXT, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_JSON, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_ORC, false},
+            {"hive", TFileFormatType::FORMAT_ORC, TFileFormatType::FORMAT_PARQUET, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_CSV_PLAIN, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_TEXT, true},
+            {"hive", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_JSON, true},
+            {"tvf", TFileFormatType::FORMAT_PARQUET, TFileFormatType::FORMAT_NATIVE, true},
+            {"remote_doris", TFileFormatType::FORMAT_ARROW, std::nullopt, true},
+            {"hive", TFileFormatType::FORMAT_ARROW, std::nullopt, false},
+            {"", TFileFormatType::FORMAT_ARROW, std::nullopt, false},
+            {"", TFileFormatType::FORMAT_WAL, std::nullopt, false},
+    };
+
+    for (const auto& test_case : cases) {
+        TFileScanRangeParams params;
+        params.__set_format_type(test_case.params_format);
+        auto range = range_with_format(test_case.table_format,
+                                       test_case.range_format.value_or(test_case.params_format));
+        if (!test_case.range_format.has_value()) {
+            range.__isset.format_type = false;
+        }
+        EXPECT_EQ(FileScannerV2::is_supported(params, range), test_case.expected)
+                << "table_format=" << test_case.table_format
+                << ", params_format=" << static_cast<int>(test_case.params_format)
+                << ", range_has_format=" << test_case.range_format.has_value();
+    }
+
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    EXPECT_FALSE(FileScannerV2::is_supported(params, hudi_range_with_delta_logs()));
+}
+
+// Scenario: SplitSourceConnector should route to FileScannerV2 only when every scan range in the
+// source is supported; one unsupported table format or file format must make the match fail.
+TEST(FileScannerV2Test, SplitSourceAllScanRangesMatchRequiresEveryRangeSupported) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+
+    const auto supported = range_with_format("hive", TFileFormatType::FORMAT_PARQUET);
+    const auto unsupported_table = range_with_format("lakesoul", TFileFormatType::FORMAT_PARQUET);
+    const auto unsupported_format = range_with_format("hive", TFileFormatType::FORMAT_ORC);
+
+    LocalSplitSourceConnector all_supported(
+            {scan_range_param(supported),
+             scan_range_param(range_with_format("iceberg", TFileFormatType::FORMAT_PARQUET))},
+            1);
+    EXPECT_TRUE(all_supported.all_scan_ranges_match(params, FileScannerV2::is_supported));
+
+    LocalSplitSourceConnector hudi_supported(
+            {scan_range_param(supported),
+             scan_range_param(range_with_format("hudi", TFileFormatType::FORMAT_PARQUET))},
+            1);
+    EXPECT_TRUE(hudi_supported.all_scan_ranges_match(params, FileScannerV2::is_supported));
+
+    LocalSplitSourceConnector table_mismatch(
+            {scan_range_param(supported), scan_range_param(unsupported_table)}, 1);
+    EXPECT_FALSE(table_mismatch.all_scan_ranges_match(params, FileScannerV2::is_supported));
+
+    LocalSplitSourceConnector format_mismatch(
+            {scan_range_param(supported), scan_range_param(unsupported_format)}, 1);
+    EXPECT_FALSE(format_mismatch.all_scan_ranges_match(params, FileScannerV2::is_supported));
+}
+
+// Scenario: FileScannerV2 converts only the file formats implemented by format_v2 readers and
+// rejects everything else before TableReader::init sees an unsupported FileFormat.
+TEST(FileScannerV2Test, FileFormatConversionMatrix) {
+    struct Case {
+        TFileFormatType::type input;
+        std::optional<format::FileFormat> expected;
+    };
+    const std::vector<Case> cases {
+            {TFileFormatType::FORMAT_PARQUET, format::FileFormat::PARQUET},
+            {TFileFormatType::FORMAT_JNI, format::FileFormat::JNI},
+            {TFileFormatType::FORMAT_CSV_PLAIN, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_GZ, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_BZ2, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_LZ4FRAME, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_LZ4BLOCK, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_LZOP, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_DEFLATE, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_CSV_SNAPPYBLOCK, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_PROTO, format::FileFormat::CSV},
+            {TFileFormatType::FORMAT_TEXT, format::FileFormat::TEXT},
+            {TFileFormatType::FORMAT_JSON, format::FileFormat::JSON},
+            {TFileFormatType::FORMAT_NATIVE, format::FileFormat::NATIVE},
+            {TFileFormatType::FORMAT_ARROW, format::FileFormat::ARROW},
+            {TFileFormatType::FORMAT_ORC, std::nullopt},
+    };
+
+    for (const auto& test_case : cases) {
+        format::FileFormat file_format = format::FileFormat::PARQUET;
+        const auto status = FileScannerV2::TEST_to_file_format(test_case.input, &file_format);
+        if (test_case.expected.has_value()) {
+            ASSERT_TRUE(status.ok()) << status;
+            EXPECT_EQ(file_format, *test_case.expected);
+        } else {
+            EXPECT_FALSE(status.ok());
+        }
+    }
+}
+
+// Scenario: partition slots are identified from the explicit FE category when present, otherwise
+// from the legacy is_file_slot flag. Scanner-generated rowid columns must never be treated as
+// partition columns even if FE marks them as non-file slots.
+TEST(FileScannerV2Test, PartitionSlotClassificationMatrix) {
+    TFileScanSlotInfo legacy_partition;
+    legacy_partition.__set_is_file_slot(false);
+    EXPECT_TRUE(FileScannerV2::TEST_is_partition_slot(legacy_partition, "dt"));
+
+    TFileScanSlotInfo legacy_file;
+    legacy_file.__set_is_file_slot(true);
+    EXPECT_FALSE(FileScannerV2::TEST_is_partition_slot(legacy_file, "value"));
+
+    TFileScanSlotInfo categorized_partition;
+    categorized_partition.__set_is_file_slot(true);
+    categorized_partition.__set_category(TColumnCategory::PARTITION_KEY);
+    EXPECT_TRUE(FileScannerV2::TEST_is_partition_slot(categorized_partition, "p"));
+
+    TFileScanSlotInfo categorized_regular;
+    categorized_regular.__set_is_file_slot(false);
+    categorized_regular.__set_category(TColumnCategory::REGULAR);
+    EXPECT_FALSE(FileScannerV2::TEST_is_partition_slot(categorized_regular, "regular_col"));
+
+    EXPECT_FALSE(
+            FileScannerV2::TEST_is_partition_slot(legacy_partition, BeConsts::GLOBAL_ROWID_COL));
+    EXPECT_FALSE(
+            FileScannerV2::TEST_is_partition_slot(legacy_partition, BeConsts::ICEBERG_ROWID_COL));
+}
+
+// Scenario: data-file slots are the complement of partition/default/synthesized columns for
+// formats without embedded schema. FE may send either the new category or the old is_file_slot
+// flag, and scanner-generated rowid columns must never be passed to a physical file reader.
+TEST(FileScannerV2Test, DataFileSlotClassificationMatrix) {
+    TFileScanSlotInfo legacy_file;
+    legacy_file.__set_is_file_slot(true);
+    EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(legacy_file, "value"));
+
+    TFileScanSlotInfo legacy_partition;
+    legacy_partition.__set_is_file_slot(false);
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_partition, "dt"));
+
+    TFileScanSlotInfo categorized_regular;
+    categorized_regular.__set_is_file_slot(false);
+    categorized_regular.__set_category(TColumnCategory::REGULAR);
+    EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(categorized_regular, "regular_col"));
+
+    TFileScanSlotInfo categorized_generated;
+    categorized_generated.__set_is_file_slot(false);
+    categorized_generated.__set_category(TColumnCategory::GENERATED);
+    EXPECT_TRUE(FileScannerV2::TEST_is_data_file_slot(categorized_generated, "generated_col"));
+
+    TFileScanSlotInfo categorized_partition;
+    categorized_partition.__set_is_file_slot(true);
+    categorized_partition.__set_category(TColumnCategory::PARTITION_KEY);
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(categorized_partition, "p"));
+
+    TFileScanSlotInfo categorized_synthesized;
+    categorized_synthesized.__set_is_file_slot(true);
+    categorized_synthesized.__set_category(TColumnCategory::SYNTHESIZED);
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(categorized_synthesized, "virtual_col"));
+
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_file, BeConsts::GLOBAL_ROWID_COL));
+    EXPECT_FALSE(FileScannerV2::TEST_is_data_file_slot(legacy_file, BeConsts::ICEBERG_ROWID_COL));
+}
+
+// Scenario: table conjuncts are cloned into global-index space before they are handed to
+// TableReader. Explicit slot-id mappings use the required_slots order; missing mappings fall back
+// to the slot id itself for legacy descriptors.
+TEST(FileScannerV2Test, RewriteSlotRefsToGlobalIndexMatrix) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    {
+        auto expr = slot_ref(42, 99, int_type, "value");
+        const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index(
+                &expr, {{42, format::GlobalIndex(3)}});
+        ASSERT_TRUE(status.ok()) << status;
+        const auto* rewritten = assert_cast<const VSlotRef*>(expr.get());
+        EXPECT_EQ(rewritten->slot_id(), 3);
+        EXPECT_EQ(rewritten->column_id(), 3);
+        EXPECT_EQ(rewritten->column_name(), "value");
+    }
+    {
+        auto expr = slot_ref(7, 99, int_type, "legacy_value");
+        const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index(&expr, {});
+        ASSERT_TRUE(status.ok()) << status;
+        const auto* rewritten = assert_cast<const VSlotRef*>(expr.get());
+        EXPECT_EQ(rewritten->slot_id(), 7);
+        EXPECT_EQ(rewritten->column_id(), 7);
+        EXPECT_EQ(rewritten->column_name(), "legacy_value");
+    }
+    {
+        auto cast_expr = format::Cast::create_shared(int_type);
+        cast_expr->add_child(slot_ref(9, 9, int_type, "nested_value"));
+        VExprSPtr expr = cast_expr;
+        const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index(
+                &expr, {{9, format::GlobalIndex(1)}});
+        ASSERT_TRUE(status.ok()) << status;
+        ASSERT_EQ(expr->get_num_children(), 1);
+        const auto* rewritten_child = assert_cast<const VSlotRef*>(expr->children()[0].get());
+        EXPECT_EQ(rewritten_child->slot_id(), 1);
+        EXPECT_EQ(rewritten_child->column_id(), 1);
+        EXPECT_EQ(rewritten_child->column_name(), "nested_value");
+    }
+    {
+        const auto node = bool_in_pred_node();
+        auto impl = VDirectInPredicate::create_shared(node, nullptr);
+        impl->add_child(slot_ref(11, 11, int_type, "rf_value"));
+        VExprSPtr expr = RuntimeFilterExpr::create_shared(node, impl, 0.4, false, 7);
+        const auto status = FileScannerV2::TEST_rewrite_slot_refs_to_global_index(
+                &expr, {{11, format::GlobalIndex(2)}});
+        ASSERT_TRUE(status.ok()) << status;
+
+        auto* runtime_filter = assert_cast<RuntimeFilterExpr*>(expr.get());
+        auto rewritten_impl = runtime_filter->get_impl();
+        ASSERT_NE(rewritten_impl, nullptr);
+        ASSERT_EQ(rewritten_impl->get_num_children(), 1);
+        const auto* rewritten_child =
+                assert_cast<const VSlotRef*>(rewritten_impl->children()[0].get());
+        EXPECT_EQ(rewritten_child->slot_id(), 2);
+        EXPECT_EQ(rewritten_child->column_id(), 2);
+        EXPECT_EQ(rewritten_child->column_name(), "rf_value");
+    }
+}
+
+} // namespace doris
diff --git a/be/test/exec/scan/vfile_scanner_exception_test.cpp b/be/test/exec/scan/vfile_scanner_exception_test.cpp
index 64b17a6a86b87b..70b3d07f8eff48 100644
--- a/be/test/exec/scan/vfile_scanner_exception_test.cpp
+++ b/be/test/exec/scan/vfile_scanner_exception_test.cpp
@@ -18,13 +18,19 @@
 #include <gen_cpp/PlanNodes_types.h>
 #include <gtest/gtest.h>
 
+#include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 #include "common/object_pool.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
 #include "cpp/sync_point.h"
 #include "exec/operator/file_scan_operator.h"
 #include "exec/scan/file_scanner.h"
+#include "exec/scan/split_source_connector.h"
+#include "format_v2/table/hive_reader.h"
 #include "io/fs/local_file_system.h"
 #include "load/group_commit/wal/wal_manager.h"
 #include "runtime/cluster_info.h"
@@ -34,7 +40,6 @@
 #include "runtime/user_function_cache.h"
 
 namespace doris {
-
 class TestSplitSourceConnectorStub : public SplitSourceConnector {
 private:
     std::mutex _range_lock;
@@ -336,4 +341,112 @@ TEST_F(VfileScannerExceptionTest, process_late_arrival_conjuncts_retain) {
     WARN_IF_ERROR(scanner->close(&_runtime_state), "fail to close scanner");
 }
 
+TEST(HiveReaderPositionMappingTest, PositionMappingUsesColumnIdxsForFileSlots) {
+    TQueryOptions query_options;
+    query_options.hive_parquet_use_column_names = false;
+    RuntimeState runtime_state(query_options, TQueryGlobals());
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    params.__set_column_idxs({2, 0});
+    format::ProjectedColumnBuildContext context {
+            .scan_params = &params,
+            .runtime_state = &runtime_state,
+    };
+    format::hive::HiveReader reader;
+
+    TFileScanSlotInfo id_slot;
+    id_slot.__set_is_file_slot(true);
+    format::ColumnDefinition id_column {
+            .identifier = Field::create_field<TYPE_STRING>("id"),
+            .name = "id",
+            .type = std::make_shared<DataTypeInt32>(),
+    };
+
+    TFileScanSlotInfo name_slot;
+    name_slot.__set_is_file_slot(true);
+    format::ColumnDefinition name_column {
+            .identifier = Field::create_field<TYPE_STRING>("name"),
+            .name = "name",
+            .type = std::make_shared<DataTypeString>(),
+    };
+
+    ASSERT_TRUE(reader.annotate_projected_column(id_slot, &context, &id_column).ok());
+    ASSERT_TRUE(id_column.has_identifier_field_id());
+    EXPECT_EQ(id_column.get_identifier_position(), 2);
+    EXPECT_EQ(context.next_file_column_idx, 1);
+
+    ASSERT_TRUE(reader.annotate_projected_column(name_slot, &context, &name_column).ok());
+    ASSERT_TRUE(name_column.has_identifier_field_id());
+    EXPECT_EQ(name_column.get_identifier_position(), 0);
+    EXPECT_EQ(context.next_file_column_idx, 2);
+    ASSERT_TRUE(reader.validate_projected_columns(context).ok());
+}
+
+TEST(HiveReaderPositionMappingTest, PositionMappingDoesNotConsumePartitionSlots) {
+    TQueryOptions query_options;
+    query_options.hive_parquet_use_column_names = false;
+    RuntimeState runtime_state(query_options, TQueryGlobals());
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    params.__set_column_idxs({3});
+    format::ProjectedColumnBuildContext context {
+            .scan_params = &params,
+            .runtime_state = &runtime_state,
+    };
+    format::hive::HiveReader reader;
+
+    TFileScanSlotInfo partition_slot;
+    partition_slot.__set_is_file_slot(false);
+    partition_slot.__set_category(TColumnCategory::PARTITION_KEY);
+    format::ColumnDefinition partition_column {
+            .identifier = Field::create_field<TYPE_STRING>("year"),
+            .name = "year",
+            .type = std::make_shared<DataTypeInt32>(),
+    };
+
+    TFileScanSlotInfo value_slot;
+    value_slot.__set_is_file_slot(true);
+    format::ColumnDefinition value_column {
+            .identifier = Field::create_field<TYPE_STRING>("value"),
+            .name = "value",
+            .type = std::make_shared<DataTypeInt32>(),
+    };
+
+    ASSERT_TRUE(reader.annotate_projected_column(partition_slot, &context, &partition_column).ok());
+    ASSERT_TRUE(partition_column.has_identifier_name());
+    EXPECT_EQ(partition_column.get_identifier_name(), "year");
+    EXPECT_EQ(context.next_file_column_idx, 0);
+
+    ASSERT_TRUE(reader.annotate_projected_column(value_slot, &context, &value_column).ok());
+    ASSERT_TRUE(value_column.has_identifier_field_id());
+    EXPECT_EQ(value_column.get_identifier_position(), 3);
+    EXPECT_EQ(context.next_file_column_idx, 1);
+    ASSERT_TRUE(reader.validate_projected_columns(context).ok());
+}
+
+TEST(HiveReaderPositionMappingTest, PositionMappingFailsWhenColumnIdxsMissing) {
+    TQueryOptions query_options;
+    query_options.hive_parquet_use_column_names = false;
+    RuntimeState runtime_state(query_options, TQueryGlobals());
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    format::ProjectedColumnBuildContext context {
+            .scan_params = &params,
+            .runtime_state = &runtime_state,
+    };
+    format::hive::HiveReader reader;
+
+    TFileScanSlotInfo value_slot;
+    value_slot.__set_is_file_slot(true);
+    format::ColumnDefinition value_column {
+            .identifier = Field::create_field<TYPE_STRING>("value"),
+            .name = "value",
+            .type = std::make_shared<DataTypeInt32>(),
+    };
+
+    auto status = reader.annotate_projected_column(value_slot, &context, &value_column);
+    EXPECT_FALSE(status.ok());
+    EXPECT_EQ(context.next_file_column_idx, 0);
+}
+
 } // namespace doris
diff --git a/be/test/format_v2/column_mapper_test.cpp b/be/test/format_v2/column_mapper_test.cpp
new file mode 100644
index 00000000000000..d870f85dd7dc10
--- /dev/null
+++ b/be/test/format_v2/column_mapper_test.cpp
@@ -0,0 +1,4140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/column_mapper.h"
+
+#include <gtest/gtest.h>
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_decimal.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/data_type_timestamptz.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vin_predicate.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/column_mapper_nested.h"
+#include "format_v2/expr/cast.h"
+#include "format_v2/schema_projection.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Exprs_types.h"
+#include "runtime/descriptors.h"
+#include "storage/predicate/predicate_creator.h"
+#include "testutil/column_helper.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format {
+namespace {
+
+DataTypePtr i32() {
+    return std::make_shared<DataTypeInt32>();
+}
+
+DataTypePtr i64() {
+    return std::make_shared<DataTypeInt64>();
+}
+
+DataTypePtr f32() {
+    return std::make_shared<DataTypeFloat32>();
+}
+
+DataTypePtr f64() {
+    return std::make_shared<DataTypeFloat64>();
+}
+
+DataTypePtr dec32(uint32_t precision, uint32_t scale) {
+    return std::make_shared<DataTypeDecimal32>(precision, scale);
+}
+
+DataTypePtr str() {
+    return std::make_shared<DataTypeString>();
+}
+
+DataTypePtr timestamptz(uint32_t scale) {
+    return std::make_shared<DataTypeTimeStampTz>(scale);
+}
+
+DataTypePtr u8() {
+    return std::make_shared<DataTypeUInt8>();
+}
+
+ColumnDefinition field_id_col(const std::string& name, int32_t field_id, DataTypePtr type,
+                              int32_t local_id = -1) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_INT>(field_id);
+    column.local_id = local_id;
+    column.name = name;
+    column.type = std::move(type);
+    return column;
+}
+
+ColumnDefinition name_col(const std::string& name, DataTypePtr type, int32_t local_id = -1) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_STRING>(name);
+    column.local_id = local_id;
+    column.name = name;
+    column.type = std::move(type);
+    return column;
+}
+
+ColumnDefinition name_id_col(const std::string& name, const std::string& identifier,
+                             DataTypePtr type, int32_t local_id = -1) {
+    ColumnDefinition column = name_col(name, std::move(type), local_id);
+    column.identifier = Field::create_field<TYPE_STRING>(identifier);
+    return column;
+}
+
+ColumnDefinition position_col(const std::string& name, int32_t file_position, DataTypePtr type) {
+    return field_id_col(name, file_position, std::move(type));
+}
+
+ColumnDefinition struct_col(const std::string& name, int32_t field_id,
+                            std::vector<ColumnDefinition> children, int32_t local_id = -1) {
+    DataTypes child_types;
+    Strings child_names;
+    child_types.reserve(children.size());
+    child_names.reserve(children.size());
+    for (const auto& child : children) {
+        child_types.push_back(child.type);
+        child_names.push_back(child.name);
+    }
+    auto column = field_id_col(
+            name, field_id, std::make_shared<DataTypeStruct>(child_types, child_names), local_id);
+    column.children = std::move(children);
+    return column;
+}
+
+ColumnDefinition struct_name_col(const std::string& name, std::vector<ColumnDefinition> children,
+                                 int32_t local_id = -1) {
+    auto column = struct_col(name, -1, std::move(children), local_id);
+    column.identifier = Field::create_field<TYPE_STRING>(name);
+    return column;
+}
+
+ColumnDefinition array_col(const std::string& name, int32_t field_id, ColumnDefinition element,
+                           int32_t local_id = -1) {
+    auto column =
+            field_id_col(name, field_id, std::make_shared<DataTypeArray>(element.type), local_id);
+    column.children = {std::move(element)};
+    return column;
+}
+
+ColumnDefinition map_col(const std::string& name, int32_t field_id,
+                         std::vector<ColumnDefinition> children, const DataTypePtr& key_type,
+                         const DataTypePtr& value_type, int32_t local_id = -1) {
+    auto column = field_id_col(name, field_id, std::make_shared<DataTypeMap>(key_type, value_type),
+                               local_id);
+    column.children = std::move(children);
+    return column;
+}
+
+void set_name_identifiers(ColumnDefinition* column, int32_t local_id) {
+    DORIS_CHECK(column != nullptr);
+    column->identifier = Field::create_field<TYPE_STRING>(column->name);
+    column->local_id = local_id;
+    for (size_t idx = 0; idx < column->children.size(); ++idx) {
+        set_name_identifiers(&column->children[idx], static_cast<int32_t>(idx));
+    }
+}
+
+std::vector<int32_t> projection_ids(const std::vector<LocalColumnIndex>& projections) {
+    std::vector<int32_t> ids;
+    ids.reserve(projections.size());
+    for (const auto& projection : projections) {
+        ids.push_back(projection.local_id());
+    }
+    return ids;
+}
+
+std::vector<std::string> target_names(const FileStructPredicateTarget* target) {
+    std::vector<std::string> names;
+    for (const auto* current = target; current != nullptr; current = current->child.get()) {
+        names.push_back(current->file_child_name);
+    }
+    return names;
+}
+
+TEST(ColumnMapperDebugTest, CoversDebugStringEnumAndNestedBranches) {
+    ColumnDefinition child = field_id_col("child", 2, str(), 3);
+    child.name_mapping = {"legacy_child"};
+
+    ColumnDefinition column = field_id_col(
+            "root", 1,
+            std::make_shared<DataTypeStruct>(DataTypes {child.type}, Strings {child.name}));
+    column.name_mapping = {"legacy_root"};
+    column.children = {child};
+    column.default_expr = VExprContext::create_shared(VLiteral::create_shared(
+            std::make_shared<DataTypeString>(), Field::create_field<TYPE_STRING>("fallback")));
+    column.is_partition_key = true;
+
+    const auto column_debug = column.debug_string();
+    EXPECT_NE(column_debug.find("ColumnDefinition{name=root"), std::string::npos);
+    EXPECT_NE(column_debug.find("name_mapping=[legacy_root]"), std::string::npos);
+    EXPECT_NE(column_debug.find("children=[ColumnDefinition{name=child"), std::string::npos);
+    EXPECT_NE(column_debug.find("has_default_expr=1"), std::string::npos);
+    EXPECT_NE(column_debug.find("is_partition_key=1"), std::string::npos);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(4);
+    projection.children.push_back(LocalColumnIndex::local(7));
+    EXPECT_NE(projection.debug_string().find("children=[LocalColumnIndex{index=7"),
+              std::string::npos);
+
+    const std::vector<TableColumnMappingMode> modes {TableColumnMappingMode::BY_FIELD_ID,
+                                                     TableColumnMappingMode::BY_NAME,
+                                                     TableColumnMappingMode::BY_INDEX};
+    const std::vector<std::string> mode_names {"BY_FIELD_ID", "BY_NAME", "BY_INDEX"};
+    for (size_t idx = 0; idx < modes.size(); ++idx) {
+        TableColumnMapperOptions options {.mode = modes[idx]};
+        EXPECT_NE(options.debug_string().find(mode_names[idx]), std::string::npos);
+    }
+
+    const std::vector<FilterConversionType> conversions {
+            FilterConversionType::COPY_DIRECTLY, FilterConversionType::CAST_FILTER,
+            FilterConversionType::READER_EXPRESSION, FilterConversionType::FINALIZE_ONLY,
+            FilterConversionType::CONSTANT};
+    const std::vector<std::string> conversion_names {
+            "COPY_DIRECTLY", "CAST_FILTER", "READER_EXPRESSION", "FINALIZE_ONLY", "CONSTANT"};
+    for (size_t idx = 0; idx < conversions.size(); ++idx) {
+        ColumnMapping mapping;
+        mapping.global_index = GlobalIndex(idx);
+        mapping.table_column_name = "table_col";
+        mapping.file_local_id = 8;
+        mapping.constant_index = ConstantIndex(9);
+        mapping.file_column_name = "file_col";
+        mapping.original_file_type = str();
+        mapping.original_file_children = {child};
+        mapping.file_type = str();
+        mapping.table_type = str();
+        mapping.is_trivial = idx % 2 == 0;
+        mapping.filter_conversion = conversions[idx];
+        mapping.virtual_column_type = static_cast<TableVirtualColumnType>(
+                idx % (TableVirtualColumnType::ICEBERG_ROWID + 1));
+        mapping.default_expr = column.default_expr;
+
+        ColumnMapping child_mapping;
+        child_mapping.global_index = GlobalIndex(10 + idx);
+        child_mapping.table_column_name = "child_col";
+        child_mapping.file_column_name = "child_file";
+        child_mapping.file_type = i32();
+        child_mapping.table_type = i32();
+        mapping.child_mappings.push_back(std::move(child_mapping));
+
+        const auto debug = mapping.debug_string();
+        EXPECT_NE(debug.find("file_local_id=8"), std::string::npos);
+        EXPECT_NE(debug.find("constant_index=9"), std::string::npos);
+        EXPECT_NE(debug.find(conversion_names[idx]), std::string::npos);
+        EXPECT_NE(debug.find("child_mappings=[ColumnMapping{global_index="), std::string::npos);
+        EXPECT_NE(debug.find("has_default_expr=1"), std::string::npos);
+    }
+}
+
+void expect_mapping(const ColumnMapping& mapping, size_t global_index,
+                    const std::string& table_name, int32_t file_local_id,
+                    const std::string& file_name, const DataTypePtr& file_type,
+                    const DataTypePtr& table_type) {
+    EXPECT_EQ(mapping.global_index, GlobalIndex(global_index));
+    EXPECT_EQ(mapping.table_column_name, table_name);
+    ASSERT_TRUE(mapping.file_local_id.has_value());
+    EXPECT_EQ(*mapping.file_local_id, file_local_id);
+    EXPECT_EQ(mapping.file_column_name, file_name);
+    ASSERT_NE(mapping.file_type, nullptr);
+    ASSERT_NE(mapping.table_type, nullptr);
+    EXPECT_TRUE(mapping.file_type->equals(*file_type));
+    EXPECT_TRUE(mapping.table_type->equals(*table_type));
+}
+
+void expect_constant(const TableColumnMapper& mapper, const ColumnMapping& mapping,
+                     size_t global_index, const DataTypePtr& table_type) {
+    EXPECT_FALSE(mapping.file_local_id.has_value());
+    ASSERT_TRUE(mapping.constant_index.has_value());
+    ASSERT_LT(mapping.constant_index->value(), mapper.constant_map().size());
+    const auto& entry = mapper.constant_map().get(*mapping.constant_index);
+    EXPECT_EQ(entry.global_index, GlobalIndex(global_index));
+    EXPECT_TRUE(entry.type->equals(*table_type));
+    EXPECT_EQ(entry.expr, mapping.default_expr);
+}
+
+void expect_missing(const ColumnMapping& mapping) {
+    EXPECT_FALSE(mapping.file_local_id.has_value());
+    EXPECT_FALSE(mapping.constant_index.has_value());
+    EXPECT_EQ(mapping.virtual_column_type, TableVirtualColumnType::INVALID);
+}
+
+class TestFunctionExpr final : public VExpr {
+public:
+    TestFunctionExpr(std::string function_name, DataTypePtr data_type,
+                     TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL,
+                     TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE)
+            : VExpr(std::move(data_type), false), _expr_name(std::move(function_name)) {
+        set_node_type(node_type);
+        _opcode = opcode;
+        TFunctionName fn_name;
+        fn_name.__set_function_name(_expr_name);
+        _fn.__set_name(fn_name);
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr =
+                std::make_shared<TestFunctionExpr>(_expr_name, data_type(), node_type(), _opcode);
+        return Status::OK();
+    }
+
+    Status execute_column_impl(VExprContext*, const Block*, const Selector*, size_t,
+                               ColumnPtr&) const override {
+        return Status::NotSupported("TestFunctionExpr is only used for ColumnMapper analysis");
+    }
+
+private:
+    std::string _expr_name;
+};
+
+VExprSPtr table_slot(int slot_id, int column_id, DataTypePtr type, const std::string& name) {
+    return VSlotRef::create_shared(slot_id, column_id, -1, std::move(type), name);
+}
+
+VExprSPtr literal(DataTypePtr type, Field value) {
+    return VLiteral::create_shared(std::move(type), std::move(value));
+}
+
+VExprSPtr struct_element(const VExprSPtr& parent, DataTypePtr child_type,
+                         const std::string& child_name) {
+    auto expr = std::make_shared<TestFunctionExpr>("struct_element", child_type);
+    expr->add_child(parent);
+    expr->add_child(literal(str(), Field::create_field<TYPE_STRING>(child_name)));
+    return expr;
+}
+
+VExprSPtr element_at(const VExprSPtr& parent, DataTypePtr child_type,
+                     const std::string& child_name) {
+    auto expr = std::make_shared<TestFunctionExpr>("element_at", std::move(child_type));
+    expr->add_child(parent);
+    expr->add_child(literal(str(), Field::create_field<TYPE_STRING>(child_name)));
+    return expr;
+}
+
+VExprSPtr array_element_at(const VExprSPtr& parent, DataTypePtr child_type, int64_t ordinal) {
+    auto expr = std::make_shared<TestFunctionExpr>("element_at", std::move(child_type));
+    expr->add_child(parent);
+    expr->add_child(literal(i64(), Field::create_field<TYPE_BIGINT>(ordinal)));
+    return expr;
+}
+
+VExprSPtr map_values(const VExprSPtr& parent, DataTypePtr value_type) {
+    auto expr = std::make_shared<TestFunctionExpr>(
+            "map_values", std::make_shared<DataTypeArray>(std::move(value_type)));
+    expr->add_child(parent);
+    return expr;
+}
+
+VExprSPtr map_keys(const VExprSPtr& parent, DataTypePtr key_type) {
+    auto expr = std::make_shared<TestFunctionExpr>(
+            "map_keys", std::make_shared<DataTypeArray>(std::move(key_type)));
+    expr->add_child(parent);
+    return expr;
+}
+
+VExprSPtr array_contains(const VExprSPtr& array, const VExprSPtr& value) {
+    auto expr = std::make_shared<TestFunctionExpr>("array_contains", u8());
+    expr->add_child(array);
+    expr->add_child(value);
+    return expr;
+}
+
+VExprSPtr like_expr(const VExprSPtr& left, const std::string& pattern) {
+    auto expr = std::make_shared<TestFunctionExpr>("like", u8());
+    expr->add_child(left);
+    expr->add_child(literal(str(), Field::create_field<TYPE_STRING>(pattern)));
+    return expr;
+}
+
+VExprSPtr struct_element_by_selector(const VExprSPtr& parent, DataTypePtr child_type,
+                                     const VExprSPtr& selector) {
+    auto expr = std::make_shared<TestFunctionExpr>("struct_element", std::move(child_type));
+    expr->add_child(parent);
+    expr->add_child(selector);
+    return expr;
+}
+
+VExprSPtr int_gt(const VExprSPtr& left, int32_t value) {
+    auto expr = std::make_shared<TestFunctionExpr>("gt", u8(), TExprNodeType::BINARY_PRED,
+                                                   TExprOpcode::GT);
+    expr->add_child(left);
+    expr->add_child(literal(i32(), Field::create_field<TYPE_INT>(value)));
+    return expr;
+}
+
+VExprSPtr binary_predicate(TExprOpcode::type opcode, const VExprSPtr& left,
+                           const VExprSPtr& right) {
+    auto expr = std::make_shared<TestFunctionExpr>("binary_predicate", u8(),
+                                                   TExprNodeType::BINARY_PRED, opcode);
+    expr->add_child(left);
+    expr->add_child(right);
+    return expr;
+}
+
+VExprSPtr in_predicate(const VExprSPtr& probe, const DataTypePtr& literal_type,
+                       const std::vector<Field>& values) {
+    auto expr = std::make_shared<TestFunctionExpr>("in", u8(), TExprNodeType::IN_PRED);
+    expr->add_child(probe);
+    for (const auto& value : values) {
+        expr->add_child(literal(literal_type, value));
+    }
+    return expr;
+}
+
+VExprSPtr null_predicate(const VExprSPtr& child, bool is_null) {
+    auto expr =
+            std::make_shared<TestFunctionExpr>(is_null ? "is_null_pred" : "is_not_null_pred", u8());
+    expr->add_child(child);
+    return expr;
+}
+
+VExprSPtr cast_expr(const VExprSPtr& child, DataTypePtr target_type) {
+    auto expr = Cast::create_shared(std::move(target_type));
+    expr->add_child(child);
+    return expr;
+}
+
+VExprSPtr compound_predicate(TExprOpcode::type opcode, const VExprSPtr& left,
+                             const VExprSPtr& right) {
+    auto expr = std::make_shared<TestFunctionExpr>("compound", u8(), TExprNodeType::COMPOUND_PRED,
+                                                   opcode);
+    expr->add_child(left);
+    expr->add_child(right);
+    return expr;
+}
+
+ColumnMapping mapped_struct_column(int32_t root_file_local_id, const std::string& child_name,
+                                   int32_t child_file_local_id, DataTypePtr child_type) {
+    ColumnDefinition file_child = name_col(child_name, child_type, child_file_local_id);
+    ColumnMapping root;
+    root.global_index = GlobalIndex(0);
+    root.table_column_name = "s";
+    root.file_local_id = root_file_local_id;
+    root.file_column_name = "s";
+    root.table_type =
+            std::make_shared<DataTypeStruct>(DataTypes {child_type}, Strings {child_name});
+    root.file_type = root.table_type;
+    root.original_file_type = root.table_type;
+    root.original_file_children = {file_child};
+    root.projected_file_children = {file_child};
+    return root;
+}
+
+std::vector<NestedStructPath> collect_paths(const VExprSPtr& expr) {
+    std::vector<NestedStructPath> paths;
+    collect_nested_struct_paths(expr, &paths);
+    return paths;
+}
+
+void expect_name_selector(const StructChildSelector& selector, const std::string& name) {
+    EXPECT_TRUE(selector.by_name);
+    EXPECT_EQ(selector.name, name);
+}
+
+void expect_ordinal_selector(const StructChildSelector& selector, size_t ordinal) {
+    EXPECT_FALSE(selector.by_name);
+    EXPECT_EQ(selector.ordinal, ordinal);
+}
+
+void expect_path_root(const NestedStructPath& path, size_t global_index) {
+    EXPECT_EQ(path.root_global_index, GlobalIndex(global_index));
+}
+
+class ColumnMapperCastTest : public testing::Test {
+protected:
+    void SetUp() override { state.set_enable_strict_cast(true); }
+
+    Status prepare_open_execute(VExprContext* context, Block* block, int* result_column_id) {
+        RETURN_IF_ERROR(context->prepare(&state, RowDescriptor()));
+        RETURN_IF_ERROR(context->open(&state));
+        return context->execute(block, result_column_id);
+    }
+
+    MockRuntimeState state;
+};
+
+class Int64ChildGreaterThanExpr final : public VExpr {
+public:
+    explicit Int64ChildGreaterThanExpr(int64_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false), _value(value) {}
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        ColumnPtr child_column;
+        RETURN_IF_ERROR(
+                get_child(0)->execute_column(context, block, selector, count, child_column));
+        const auto& input = assert_cast<const ColumnInt64&>(*child_column);
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            result_data[row] = input.get_element(row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<Int64ChildGreaterThanExpr>(_value);
+        return Status::OK();
+    }
+
+private:
+    const int64_t _value;
+    const std::string _expr_name = "Int64ChildGreaterThanExpr";
+};
+
+class Int64BinaryPredicateExpr final : public VExpr {
+public:
+    explicit Int64BinaryPredicateExpr(TExprOpcode::type opcode)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false) {
+        set_node_type(TExprNodeType::BINARY_PRED);
+        _opcode = opcode;
+    }
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        ColumnPtr left_column;
+        RETURN_IF_ERROR(get_child(0)->execute_column(context, block, selector, count, left_column));
+        ColumnPtr right_column;
+        RETURN_IF_ERROR(
+                get_child(1)->execute_column(context, block, selector, count, right_column));
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto left = left_column->get_int(row);
+            const auto right = right_column->get_int(row);
+            switch (_opcode) {
+            case TExprOpcode::GT:
+                result_data[row] = left > right;
+                break;
+            case TExprOpcode::LT:
+                result_data[row] = left < right;
+                break;
+            default:
+                return Status::InternalError("Unsupported test opcode {}", _opcode);
+            }
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<Int64BinaryPredicateExpr>(_opcode);
+        return Status::OK();
+    }
+
+private:
+    const std::string _expr_name = "Int64BinaryPredicateExpr";
+};
+
+VExprSPtr create_in_predicate() {
+    TExprNode node;
+    node.__set_node_type(TExprNodeType::IN_PRED);
+    node.__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN));
+    node.__set_is_nullable(false);
+    node.__set_num_children(0);
+    TInPredicate in_predicate;
+    in_predicate.__set_is_not_in(false);
+    node.__set_in_predicate(in_predicate);
+    return VInPredicate::create_shared(node);
+}
+
+// ----------------------------------------------------------------------
+// L0 schema projection helper tests.
+// These tests isolate LocalColumnIndex projection semantics before
+// TableColumnMapper starts mutating ColumnMapping state.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperSchemaProjectionTest, ProjectsStructByLocalIdAndKeepsFileOrder) {
+    auto a = field_id_col("a", 101, i32(), 0);
+    auto b = field_id_col("b", 102, str(), 1);
+    auto root = struct_col("s", 100, {a, b}, 7);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(7);
+    projection.children.push_back(LocalColumnIndex::local(1));
+    projection.children.push_back(LocalColumnIndex::local(0));
+
+    ColumnDefinition projected;
+    ASSERT_TRUE(project_column_definition(root, projection, &projected).ok());
+    ASSERT_EQ(projected.children.size(), 2);
+    EXPECT_EQ(projected.children[0].name, "a");
+    EXPECT_EQ(projected.children[1].name, "b");
+
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(projected.type).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 2);
+    EXPECT_EQ(projected_type->get_element_name(0), "a");
+    EXPECT_EQ(projected_type->get_element_name(1), "b");
+}
+
+TEST(ColumnMapperSchemaProjectionTest, ProjectsArrayElementStructLeaf) {
+    auto a = field_id_col("a", 1, i32(), 0);
+    auto b = field_id_col("b", 2, str(), 1);
+    auto element = struct_col("element", 10, {a, b}, 0);
+    auto array = array_col("items", 100, element, 5);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(5);
+    auto element_projection = LocalColumnIndex::partial_local(0);
+    element_projection.children.push_back(LocalColumnIndex::local(1));
+    projection.children.push_back(std::move(element_projection));
+
+    ColumnDefinition projected;
+    ASSERT_TRUE(project_column_definition(array, projection, &projected).ok());
+    ASSERT_EQ(projected.children.size(), 1);
+    ASSERT_EQ(projected.children[0].children.size(), 1);
+    EXPECT_EQ(projected.children[0].children[0].name, "b");
+
+    const auto* array_type =
+            assert_cast<const DataTypeArray*>(remove_nullable(projected.type).get());
+    const auto* element_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(array_type->get_nested_type()).get());
+    ASSERT_EQ(element_type->get_elements().size(), 1);
+    EXPECT_EQ(element_type->get_element_name(0), "b");
+}
+
+TEST(ColumnMapperSchemaProjectionTest, ProjectsMapValueStructLeaf) {
+    auto key = field_id_col("key", 1, str(), 0);
+    auto value_a = field_id_col("a", 2, i32(), 0);
+    auto value_b = field_id_col("b", 3, str(), 1);
+    auto value_type =
+            std::make_shared<DataTypeStruct>(DataTypes {i32(), str()}, Strings {"a", "b"});
+    ColumnDefinition value = field_id_col("value", 4, value_type, 1);
+    value.children = {value_a, value_b};
+    auto map = map_col("m", 100, {key, value}, str(), value_type, 9);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(9);
+    projection.children.push_back(LocalColumnIndex::local(0));
+    auto value_projection = LocalColumnIndex::partial_local(1);
+    value_projection.children.push_back(LocalColumnIndex::local(1));
+    projection.children.push_back(std::move(value_projection));
+
+    ColumnDefinition projected;
+    ASSERT_TRUE(project_column_definition(map, projection, &projected).ok());
+    ASSERT_EQ(projected.children.size(), 2);
+    EXPECT_EQ(projected.children[0].name, "key");
+    EXPECT_TRUE(projected.children[0].children.empty());
+    EXPECT_EQ(projected.children[1].name, "value");
+    ASSERT_EQ(projected.children[1].children.size(), 1);
+    EXPECT_EQ(projected.children[1].children[0].name, "b");
+
+    const auto* map_type = assert_cast<const DataTypeMap*>(remove_nullable(projected.type).get());
+    const auto* projected_value =
+            assert_cast<const DataTypeStruct*>(remove_nullable(map_type->get_value_type()).get());
+    ASSERT_EQ(projected_value->get_elements().size(), 1);
+    EXPECT_EQ(projected_value->get_element_name(0), "b");
+}
+
+TEST(ColumnMapperSchemaProjectionTest, RejectsMapKeyOnlyProjection) {
+    auto key = field_id_col("key", 1, str(), 0);
+    auto value = field_id_col("value", 2, i32(), 1);
+    auto map = map_col("m", 100, {key, value}, str(), i32(), 9);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(9);
+    projection.children.push_back(LocalColumnIndex::local(0));
+
+    ColumnDefinition projected;
+    const auto status = project_column_definition(map, projection, &projected);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no value child"), std::string::npos);
+}
+
+TEST(ColumnMapperSchemaProjectionTest, RejectsInvalidProjectionChildIdWithFieldName) {
+    auto root = struct_col("s", 100, {field_id_col("a", 101, i32(), 0)}, 7);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(7);
+    projection.children.push_back(LocalColumnIndex::local(99));
+
+    ColumnDefinition projected;
+    const auto status = project_column_definition(root, projection, &projected);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid projection child id 99 for field s"),
+              std::string::npos);
+}
+
+TEST(ColumnMapperSchemaProjectionTest, RejectsEmptyProjectionPathWithFieldName) {
+    auto root = struct_col("s", 100, {field_id_col("a", 101, i32(), 0)}, 7);
+
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(7);
+    projection.children.push_back(LocalColumnIndex::local(-1));
+
+    ColumnDefinition projected;
+    const auto status = project_column_definition(root, projection, &projected);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Empty projection path for field s"), std::string::npos);
+}
+
+TEST(ColumnMapperSchemaProjectionTest, RejectsInvalidChildProjectionForPrimitiveField) {
+    auto root = field_id_col("i", 1, i32(), 7);
+    LocalColumnIndex projection = LocalColumnIndex::partial_local(7);
+    projection.children.push_back(LocalColumnIndex::local(0));
+
+    ColumnDefinition projected;
+    const auto status = project_column_definition(root, projection, &projected);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid projection child id 0 for field i"),
+              std::string::npos);
+}
+
+// ----------------------------------------------------------------------
+// L0 nested helper tests.
+// These tests cover child ordering, direct schema path resolution, and
+// predicate-filter merging without going through create_scan_request().
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperNestedHelperTest, PresentChildMappingsAreSortedByFileLocalId) {
+    ColumnMapping b;
+    b.table_column_name = "b";
+    b.file_local_id = 2;
+    ColumnMapping missing;
+    missing.table_column_name = "missing";
+    ColumnMapping a;
+    a.table_column_name = "a";
+    a.file_local_id = 1;
+
+    const std::vector<ColumnMapping> child_mappings = {b, missing, a};
+    const auto present = present_child_mappings_in_file_order(child_mappings);
+    ASSERT_EQ(present.size(), 2);
+    EXPECT_EQ(present[0]->table_column_name, "a");
+    EXPECT_EQ(present[1]->table_column_name, "b");
+}
+
+TEST(ColumnMapperNestedHelperTest, BuildsProjectionByNameAndOrdinalSelectors) {
+    auto leaf = field_id_col("leaf", 3, i32(), 0);
+    auto nested = struct_col("nested", 2, {leaf}, 1);
+    auto first = field_id_col("first", 1, str(), 0);
+    const std::vector<ColumnDefinition> children = {first, nested};
+
+    const std::vector<StructChildSelector> by_name = {
+            {.by_name = true, .name = "nested", .ordinal = 0},
+            {.by_name = true, .name = "leaf", .ordinal = 0},
+    };
+    LocalColumnIndex named_projection;
+    ASSERT_TRUE(build_file_child_projection_from_schema(children, by_name, &named_projection).ok());
+    EXPECT_EQ(named_projection.local_id(), 1);
+    ASSERT_EQ(named_projection.children.size(), 1);
+    EXPECT_EQ(named_projection.children[0].local_id(), 0);
+
+    const std::vector<StructChildSelector> by_ordinal = {
+            {.by_name = false, .name = "", .ordinal = 2},
+            {.by_name = false, .name = "", .ordinal = 1},
+    };
+    LocalColumnIndex ordinal_projection;
+    ASSERT_TRUE(build_file_child_projection_from_schema(children, by_ordinal, &ordinal_projection)
+                        .ok());
+    EXPECT_EQ(ordinal_projection.local_id(), 1);
+    ASSERT_EQ(ordinal_projection.children.size(), 1);
+    EXPECT_EQ(ordinal_projection.children[0].local_id(), 0);
+}
+
+TEST(ColumnMapperNestedHelperTest, MergesPredicateFiltersForSameNestedTarget) {
+    FileColumnPredicateFilter gt_filter;
+    gt_filter.target = FileNestedPredicateTarget(
+            LocalColumnId(7), std::make_unique<FileStructPredicateTarget>(2, "score"));
+    gt_filter.file_column_id = LocalColumnId(7);
+    gt_filter.file_child_id_path = {2};
+    gt_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            7, "score", i32(), Field::create_field<TYPE_INT>(10), false));
+
+    FileColumnPredicateFilter lt_filter;
+    lt_filter.target = FileNestedPredicateTarget(
+            LocalColumnId(7), std::make_unique<FileStructPredicateTarget>(2, "score"));
+    lt_filter.file_column_id = LocalColumnId(7);
+    lt_filter.file_child_id_path = {2};
+    lt_filter.predicates.push_back(create_comparison_predicate<PredicateType::LT>(
+            7, "score", i32(), Field::create_field<TYPE_INT>(100), false));
+
+    std::vector<FileColumnPredicateFilter> filters;
+    merge_column_predicate_filter(std::move(gt_filter), &filters);
+    merge_column_predicate_filter(std::move(lt_filter), &filters);
+
+    ASSERT_EQ(filters.size(), 1);
+    EXPECT_EQ(filters[0].effective_file_column_id(), LocalColumnId(7));
+    EXPECT_EQ(filters[0].effective_file_child_id_path(), std::vector<int32_t>({2}));
+    ASSERT_EQ(filters[0].predicates.size(), 2);
+    EXPECT_EQ(target_names(filters[0].target.struct_target.get()),
+              std::vector<std::string>({"score"}));
+}
+
+TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersFromOr) {
+    const auto int_type = i32();
+    const auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"a"});
+    const auto slot = table_slot(0, 0, struct_type, "s");
+    const auto left = int_gt(struct_element(slot, int_type, "a"), 10);
+    const auto right = int_gt(struct_element(slot, int_type, "a"), 20);
+    const auto or_expr = compound_predicate(TExprOpcode::COMPOUND_OR, left, right);
+
+    std::vector<FileColumnPredicateFilter> filters;
+    collect_nested_column_predicate_filters(or_expr, {mapped_struct_column(5, "a", 0, int_type)},
+                                            &filters);
+
+    EXPECT_TRUE(filters.empty());
+}
+
+TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersFromUnsupportedExpression) {
+    const auto int_type = i32();
+    const auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"a"});
+    auto add_expr = std::make_shared<TestFunctionExpr>("add", int_type);
+    add_expr->add_child(struct_element(table_slot(0, 0, struct_type, "s"), int_type, "a"));
+    add_expr->add_child(literal(int_type, Field::create_field<TYPE_INT>(1)));
+
+    std::vector<FileColumnPredicateFilter> filters;
+    collect_nested_column_predicate_filters(add_expr, {mapped_struct_column(5, "a", 0, int_type)},
+                                            &filters);
+
+    EXPECT_TRUE(filters.empty());
+}
+
+TEST(ColumnMapperNestedHelperTest, DoesNotExtractPredicateFiltersThroughUnsafeCast) {
+    const auto file_type = i64();
+    const auto table_type = i32();
+    const auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {file_type}, Strings {"a"});
+    const auto nested_leaf = struct_element(table_slot(0, 0, struct_type, "s"), file_type, "a");
+    const auto filter_expr = int_gt(cast_expr(nested_leaf, table_type), 10);
+
+    std::vector<FileColumnPredicateFilter> filters;
+    collect_nested_column_predicate_filters(filter_expr,
+                                            {mapped_struct_column(5, "a", 0, file_type)}, &filters);
+
+    EXPECT_TRUE(filters.empty());
+}
+
+// ----------------------------------------------------------------------
+// collect_nested_struct_paths() helper tests.
+// These tests assert the entry helper for nested scan projection: it only discovers
+// table-side struct paths. Later localization decides whether to build pruning predicates.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperCollectNestedStructPathsTest, CollectsNameOrdinalAndBooleanSelectors) {
+    const auto leaf_type = i32();
+    const auto inner_type =
+            std::make_shared<DataTypeStruct>(DataTypes {leaf_type, leaf_type}, Strings {"x", "y"});
+    const auto root_type = std::make_shared<DataTypeStruct>(DataTypes {inner_type, leaf_type},
+                                                            Strings {"nested", "missing"});
+    const auto root = table_slot(0, 3, root_type, "s");
+
+    const auto nested_by_ordinal = struct_element_by_selector(
+            struct_element_by_selector(root, inner_type,
+                                       literal(i32(), Field::create_field<TYPE_INT>(1))),
+            leaf_type, literal(i32(), Field::create_field<TYPE_INT>(2)));
+    auto paths = collect_paths(nested_by_ordinal);
+    ASSERT_EQ(paths.size(), 1);
+    expect_path_root(paths[0], 3);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+    expect_ordinal_selector(paths[0].selectors[0], 1);
+    expect_ordinal_selector(paths[0].selectors[1], 2);
+
+    const std::vector<VExprSPtr> positive_ordinal_selectors = {
+            literal(std::make_shared<DataTypeInt8>(),
+                    Field::create_field<TYPE_TINYINT>(static_cast<int8_t>(1))),
+            literal(std::make_shared<DataTypeInt16>(),
+                    Field::create_field<TYPE_SMALLINT>(static_cast<int16_t>(2))),
+            literal(i32(), Field::create_field<TYPE_INT>(3)),
+            literal(i64(), Field::create_field<TYPE_BIGINT>(4)),
+            literal(u8(), Field::create_field<TYPE_BOOLEAN>(true)),
+    };
+    for (size_t idx = 0; idx < positive_ordinal_selectors.size(); ++idx) {
+        const auto selected =
+                struct_element_by_selector(root, leaf_type, positive_ordinal_selectors[idx]);
+        paths = collect_paths(selected);
+        ASSERT_EQ(paths.size(), 1);
+        ASSERT_EQ(paths[0].selectors.size(), 1);
+        expect_ordinal_selector(paths[0].selectors[0], idx == 4 ? 1 : idx + 1);
+    }
+
+    paths = collect_paths(struct_element(root, leaf_type, "missing"));
+    ASSERT_EQ(paths.size(), 1);
+    ASSERT_EQ(paths[0].selectors.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "missing");
+}
+
+TEST(ColumnMapperCollectNestedStructPathsTest, IgnoresInvalidSelectorsAndNonPathRoots) {
+    const auto leaf_type = i32();
+    const auto root_type = std::make_shared<DataTypeStruct>(DataTypes {leaf_type}, Strings {"a"});
+    const auto root = table_slot(0, 0, root_type, "s");
+
+    const std::vector<VExprSPtr> invalid_selectors = {
+            literal(i32(), Field::create_field<TYPE_INT>(0)),
+            literal(i32(), Field::create_field<TYPE_INT>(-1)),
+            literal(u8(), Field::create_field<TYPE_BOOLEAN>(false)),
+            literal(f32(), Field::create_field<TYPE_FLOAT>(1.0F)),
+            literal(f64(), Field::create_field<TYPE_DOUBLE>(1.0)),
+            table_slot(1, 1, i32(), "selector"),
+    };
+    for (const auto& selector : invalid_selectors) {
+        EXPECT_TRUE(collect_paths(struct_element_by_selector(root, leaf_type, selector)).empty());
+    }
+
+    auto wrong_arity = std::make_shared<TestFunctionExpr>("struct_element", leaf_type);
+    wrong_arity->add_child(root);
+    EXPECT_TRUE(collect_paths(wrong_arity).empty());
+
+    auto not_struct_element = std::make_shared<TestFunctionExpr>("other_function", leaf_type);
+    not_struct_element->add_child(root);
+    not_struct_element->add_child(literal(str(), Field::create_field<TYPE_STRING>("a")));
+    EXPECT_TRUE(collect_paths(not_struct_element).empty());
+
+    EXPECT_TRUE(collect_paths(struct_element(literal(str(), Field::create_field<TYPE_STRING>("x")),
+                                             leaf_type, "a"))
+                        .empty());
+    EXPECT_TRUE(collect_paths(nullptr).empty());
+}
+
+TEST(ColumnMapperCollectNestedStructPathsTest, RecursesThroughExpressionsAndKeepsCompletePath) {
+    const auto leaf_type = i32();
+    const auto inner_type = std::make_shared<DataTypeStruct>(DataTypes {leaf_type}, Strings {"b"});
+    const auto root_type =
+            std::make_shared<DataTypeStruct>(DataTypes {inner_type, leaf_type}, Strings {"a", "c"});
+    const auto root = table_slot(0, 2, root_type, "s");
+    const auto path_a = struct_element_by_selector(
+            root, inner_type, literal(str(), Field::create_field<TYPE_STRING>("a")));
+    const auto path_ab = struct_element_by_selector(
+            path_a, leaf_type, literal(str(), Field::create_field<TYPE_STRING>("b")));
+    const auto path_c = struct_element_by_selector(
+            root, leaf_type, literal(str(), Field::create_field<TYPE_STRING>("c")));
+
+    auto paths = collect_paths(binary_predicate(
+            TExprOpcode::GT, path_ab, literal(leaf_type, Field::create_field<TYPE_INT>(1))));
+    ASSERT_EQ(paths.size(), 1);
+    expect_path_root(paths[0], 2);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+    expect_name_selector(paths[0].selectors[0], "a");
+    expect_name_selector(paths[0].selectors[1], "b");
+
+    paths = collect_paths(compound_predicate(
+            TExprOpcode::COMPOUND_OR,
+            binary_predicate(TExprOpcode::GT, path_ab,
+                             literal(leaf_type, Field::create_field<TYPE_INT>(1))),
+            binary_predicate(TExprOpcode::LT, path_c,
+                             literal(leaf_type, Field::create_field<TYPE_INT>(2)))));
+    ASSERT_EQ(paths.size(), 2);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+    ASSERT_EQ(paths[1].selectors.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "a");
+    expect_name_selector(paths[0].selectors[1], "b");
+    expect_name_selector(paths[1].selectors[0], "c");
+
+    auto fn = std::make_shared<TestFunctionExpr>("fn", leaf_type);
+    fn->add_child(path_ab);
+    fn->add_child(table_slot(3, 4, leaf_type, "other"));
+    paths = collect_paths(fn);
+    ASSERT_EQ(paths.size(), 1);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+
+    auto if_expr = std::make_shared<TestFunctionExpr>("if", leaf_type);
+    if_expr->add_child(literal(u8(), Field::create_field<TYPE_BOOLEAN>(true)));
+    if_expr->add_child(path_ab);
+    if_expr->add_child(path_c);
+    paths = collect_paths(if_expr);
+    ASSERT_EQ(paths.size(), 2);
+
+    paths = collect_paths(compound_predicate(TExprOpcode::COMPOUND_AND, path_ab, path_ab));
+    ASSERT_EQ(paths.size(), 2);
+
+    paths = collect_paths(path_ab);
+    ASSERT_EQ(paths.size(), 1);
+    ASSERT_EQ(paths[0].selectors.size(), 2);
+}
+
+TEST(ColumnMapperCollectNestedStructPathsTest, CastBehaviorSeparatesProjectionAndPruningRules) {
+    const auto int_type = i32();
+    const auto bigint_type = i64();
+    const auto float_type = f32();
+    const auto double_type = f64();
+    const auto decimal_small = dec32(8, 2);
+    const auto decimal_wide = dec32(9, 2);
+    const auto decimal_changed_scale = dec32(9, 3);
+
+    const auto root_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, float_type, decimal_small}, Strings {"i", "f", "d"});
+    const auto root = table_slot(0, 0, root_type, "s");
+    const auto int_path = struct_element(root, int_type, "i");
+    const auto float_path = struct_element(root, float_type, "f");
+    const auto decimal_path = struct_element(root, decimal_small, "d");
+
+    auto paths = collect_paths(cast_expr(int_path, bigint_type));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "i");
+
+    paths = collect_paths(cast_expr(float_path, double_type));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "f");
+
+    paths = collect_paths(cast_expr(decimal_path, decimal_wide));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "d");
+
+    paths = collect_paths(
+            cast_expr(struct_element(root, make_nullable(int_type), "i"), make_nullable(int_type)));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "i");
+
+    // Unsafe casts are not accepted as pruning paths, but collect_nested_struct_paths() still
+    // recurses into children so scan projection can read the column needed by row-level filters.
+    paths = collect_paths(cast_expr(struct_element(root, bigint_type, "i"), int_type));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "i");
+
+    paths = collect_paths(cast_expr(decimal_path, decimal_changed_scale));
+    ASSERT_EQ(paths.size(), 1);
+    expect_name_selector(paths[0].selectors[0], "d");
+
+    EXPECT_TRUE(collect_paths(cast_expr(table_slot(1, 1, int_type, "plain"), bigint_type)).empty());
+}
+
+TEST(ColumnMapperCollectNestedStructPathsTest, ProjectionMergeKeepsFilterOnlyPathAndDeduplicates) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type);
+    auto table_b = name_col("b", int_type);
+    auto table_output = struct_name_col("s", {table_a});
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", int_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok());
+
+    const auto path_b =
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "b");
+    auto filter_expr = compound_predicate(
+            TExprOpcode::COMPOUND_AND,
+            binary_predicate(TExprOpcode::GT, path_b,
+                             literal(int_type, Field::create_field<TYPE_INT>(1))),
+            binary_predicate(TExprOpcode::LT, path_b,
+                             literal(int_type, Field::create_field<TYPE_INT>(10))));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_output}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5));
+    ASSERT_FALSE(request.predicate_columns[0].project_all_children);
+    EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector<int32_t>({0, 1}));
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({1}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 2);
+}
+
+// Scenario: row-oriented readers such as CSV/Text cannot lazy-read predicate columns separately.
+// For a complex root that is both projected and referenced by a filter, the materialized mapper
+// keeps one non-predicate scan entry and asks the reader to read the full top-level struct.
+TEST(ColumnMapperScanRequestTest, MaterializedMapperUsesSingleScanColumnList) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type, 0);
+    auto table_b = name_col("b", int_type, 1);
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+    auto table_output = struct_name_col("s", {table_a});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", int_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok());
+
+    const auto path_b =
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "b");
+    auto filter_expr = binary_predicate(TExprOpcode::GT, path_b,
+                                        literal(int_type, Field::create_field<TYPE_INT>(1)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_output}, &request).ok());
+
+    EXPECT_TRUE(request.predicate_columns.empty());
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: a FileReader must expose semantic children for complex file columns. If it returns a
+// complex DataType but leaves ColumnDefinition::children empty, mapper should return a diagnostic
+// error instead of aborting inside ARRAY/MAP/STRUCT child lookup.
+TEST(ColumnMapperScanRequestTest, MalformedComplexFileSchemaReturnsError) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type, 0);
+    auto table_b = name_col("b", string_type, 1);
+    auto table_struct = struct_name_col("s", {table_a, table_b});
+    auto file_struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    auto malformed_file_struct = name_col("s", file_struct_type, 5);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    const auto status = mapper.create_mapping({table_struct}, {}, {malformed_file_struct});
+
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Malformed complex file schema"), std::string::npos)
+            << status;
+}
+
+// Scenario: when the projected table schema contains the child referenced by the filter, the
+// materialized mapper can still rewrite the table-level struct child predicate into a file-local
+// conjunct. It remains a single full-root scan column; only the expression is localized.
+TEST(ColumnMapperScanRequestTest, MaterializedMapperLocalizesMappedStructChildConjunct) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type, 0);
+    auto table_b = name_col("b", int_type, 1);
+    auto table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", int_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto path_b = struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "b");
+    auto filter_expr = binary_predicate(TExprOpcode::GT, path_b,
+                                        literal(int_type, Field::create_field<TYPE_INT>(1)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    EXPECT_TRUE(request.predicate_columns.empty());
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+    ASSERT_EQ(request.conjuncts.size(), 1);
+}
+
+// Scenario: even output-only partial complex projections such as `SELECT s.a` must scan the full
+// top-level struct for materialized readers, because delimited text formats cannot physically read
+// only one nested child from a single text field.
+TEST(ColumnMapperScanRequestTest, MaterializedMapperScansFullComplexRootForOutputOnlyProjection) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = name_col("a", int_type, 0);
+    auto table_output = struct_name_col("s", {table_a});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", int_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b, name_col("c", string_type, 2)}, 5);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_output}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_output}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: array/map nested projections also scan the full top-level complex root for
+// materialized readers. This keeps row-oriented formats from receiving Parquet-style partial
+// projections for `array<struct>` elements or map value structs.
+TEST(ColumnMapperScanRequestTest, MaterializedMapperScansFullArrayAndMapRoots) {
+    const auto key_type = str();
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_array_child = name_col("b", string_type);
+    auto table_array_element = struct_name_col("element", {table_array_child});
+    auto table_array = array_col("items", -1, table_array_element);
+    table_array.identifier = Field::create_field<TYPE_STRING>("items");
+    set_name_identifiers(&table_array, -1);
+
+    auto file_array_a = name_col("a", int_type, 0);
+    auto file_array_b = name_col("b", string_type, 1);
+    auto file_array_element = struct_name_col("element", {file_array_a, file_array_b}, 0);
+    auto file_array = array_col("items", -1, file_array_element, 4);
+    file_array.identifier = Field::create_field<TYPE_STRING>("items");
+    set_name_identifiers(&file_array, 4);
+
+    auto table_value_b = name_col("b", string_type);
+    auto table_value = struct_name_col("value", {table_value_b});
+    auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type);
+    table_map.identifier = Field::create_field<TYPE_STRING>("m");
+    set_name_identifiers(&table_map, -1);
+
+    auto file_key = name_col("key", key_type, 0);
+    auto file_value_a = name_col("a", int_type, 0);
+    auto file_value_b = name_col("b", string_type, 1);
+    auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1);
+    auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 6);
+    file_map.identifier = Field::create_field<TYPE_STRING>("m");
+    set_name_identifiers(&file_map, 6);
+
+    MaterializedColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_array, table_map}, {}, {file_array, file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array, table_map}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 2);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(4));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_EQ(request.non_predicate_columns[1].column_id(), LocalColumnId(6));
+    EXPECT_TRUE(request.non_predicate_columns[1].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[1].children.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// ----------------------------------------------------------------------
+// L1 create_mapping root matching tests.
+// These cases cover the three supported root matching modes and the
+// missing/default behavior that each mode feeds into later scan requests.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperCreateMappingTest, ByNameMatchesCaseIdentifierAndAliases) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            name_col("ID", int_type),
+            name_id_col("renamed", "legacy_name", int_type),
+            [] {
+                auto column = name_col("current_alias", i32());
+                column.name_mapping = {"old_alias"};
+                return column;
+            }(),
+            name_col("file_alias", int_type),
+    };
+    std::vector<ColumnDefinition> file_schema = {
+            name_col("id", int_type, 0),
+            name_col("legacy_name", int_type, 1),
+            name_col("old_alias", int_type, 2),
+            [] {
+                auto column = name_col("physical_name", i32(), 3);
+                column.name_mapping = {"file_alias"};
+                return column;
+            }(),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 4);
+    expect_mapping(mapper.mappings()[0], 0, "ID", 0, "id", int_type, int_type);
+    expect_mapping(mapper.mappings()[1], 1, "renamed", 1, "legacy_name", int_type, int_type);
+    expect_mapping(mapper.mappings()[2], 2, "current_alias", 2, "old_alias", int_type, int_type);
+    expect_mapping(mapper.mappings()[3], 3, "file_alias", 3, "physical_name", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByNameUsesFirstMatchingFileFieldWhenAmbiguous) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            name_col("id", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("ID", int_type, 0),
+            name_col("id", int_type, 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "id", 0, "ID", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, TimestampTzScaleMismatchDoesNotAddFinalizeCast) {
+    // Scenario: HDFS TVF may expose a table slot as TIMESTAMPTZ(0), while a Parquet logical UTC
+    // timestamp file schema is materialized as TIMESTAMPTZ(6). Finalization must not add a SQL
+    // cast from scale 6 to scale 0, because that cast rounds fractional seconds:
+    //   2025-06-01 12:34:56.789+08:00 -> 2025-06-01 12:34:57+08:00
+    // Reader finalization should pass the column through; the output slot type controls display
+    // scale and hides the fractional part without changing the stored instant.
+    const auto table_type = timestamptz(0);
+    const auto file_type = timestamptz(6);
+    const std::vector<ColumnDefinition> table_schema = {name_col("ts_tz", table_type)};
+    const std::vector<ColumnDefinition> file_schema = {name_col("ts_tz", file_type, 0)};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "ts_tz", 0, "ts_tz", file_type, table_type);
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+    EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::COPY_DIRECTLY);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByNameUsesNameMappingForRenamedColumn) {
+    const auto int_type = i32();
+    auto table_column = name_col("current_id", int_type);
+    table_column.name_mapping = {"legacy_id"};
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("legacy_id", int_type, 0),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_column}, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "current_id", 0, "legacy_id", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByNameUsesNameMappingForNestedSchemaEvolution) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_country = name_col("country", string_type);
+    table_country.name_mapping = {"old_country"};
+    auto table_city = name_col("city", string_type);
+    auto table_struct = struct_name_col("struct_column", {table_country, table_city});
+    set_name_identifiers(&table_struct, -1);
+
+    auto table_item = name_col("item", string_type);
+    table_item.name_mapping = {"product"};
+    auto table_quantity = name_col("quantity", int_type);
+    auto table_element = struct_name_col("element", {table_item, table_quantity});
+    auto table_array = array_col("array_column", -1, table_element);
+    set_name_identifiers(&table_array, -1);
+
+    auto table_key = name_col("key", string_type);
+    auto table_full_name = name_col("full_name", string_type);
+    table_full_name.name_mapping = {"name"};
+    auto table_age = name_col("age", int_type);
+    auto table_value = struct_name_col("value", {table_full_name, table_age});
+    auto table_map =
+            map_col("new_map_column", -1, {table_key, table_value}, string_type, table_value.type);
+    table_map.name_mapping = {"map_column"};
+    set_name_identifiers(&table_map, -1);
+
+    auto file_old_country = name_col("old_country", string_type, 0);
+    auto file_city = name_col("city", string_type, 1);
+    auto file_struct = struct_name_col("struct_column", {file_old_country, file_city}, 3);
+    set_name_identifiers(&file_struct, 3);
+
+    auto file_product = name_col("product", string_type, 0);
+    auto file_element = struct_name_col("list", {file_product}, 0);
+    auto file_array = array_col("array_column", -1, file_element, 4);
+    set_name_identifiers(&file_array, 4);
+
+    auto file_key = name_col("key", string_type, 0);
+    auto file_name = name_col("name", string_type, 0);
+    auto file_age = name_col("age", int_type, 1);
+    auto file_value = struct_name_col("value", {file_name, file_age}, 1);
+    auto file_map =
+            map_col("map_column", -1, {file_key, file_value}, string_type, file_value.type, 5);
+    set_name_identifiers(&file_map, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct, table_array, table_map}, {},
+                                      {file_struct, file_array, file_map})
+                        .ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    const auto& struct_mapping = mapper.mappings()[0];
+    expect_mapping(struct_mapping, 0, "struct_column", 3, "struct_column", file_struct.type,
+                   table_struct.type);
+    ASSERT_EQ(struct_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(struct_mapping.child_mappings[0].file_column_name, "old_country");
+    EXPECT_EQ(*struct_mapping.child_mappings[0].file_local_id, 0);
+    EXPECT_EQ(struct_mapping.child_mappings[1].file_column_name, "city");
+    EXPECT_EQ(*struct_mapping.child_mappings[1].file_local_id, 1);
+
+    const auto& array_mapping = mapper.mappings()[1];
+    expect_mapping(array_mapping, 1, "array_column", 4, "array_column", file_array.type,
+                   table_array.type);
+    ASSERT_EQ(array_mapping.child_mappings.size(), 1);
+    const auto& element_mapping = array_mapping.child_mappings[0];
+    EXPECT_EQ(element_mapping.file_column_name, "list");
+    EXPECT_EQ(*element_mapping.file_local_id, 0);
+    ASSERT_EQ(element_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(element_mapping.child_mappings[0].file_column_name, "product");
+    EXPECT_EQ(*element_mapping.child_mappings[0].file_local_id, 0);
+    expect_missing(element_mapping.child_mappings[1]);
+
+    const auto& map_mapping = mapper.mappings()[2];
+    expect_mapping(map_mapping, 2, "new_map_column", 5, "map_column", file_map.type,
+                   table_map.type);
+    ASSERT_EQ(map_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key");
+    EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0);
+    const auto& value_mapping = map_mapping.child_mappings[1];
+    EXPECT_EQ(value_mapping.file_column_name, "value");
+    EXPECT_EQ(*value_mapping.file_local_id, 1);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "name");
+    EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 0);
+    EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "age");
+    EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 1);
+}
+
+// Scenario: SELECT * can carry only the full complex DataType without expanded nested
+// ColumnDefinitions. When an old file has map value STRUCT<age, name> and the table type is
+// STRUCT<age, full_name, gender>, the mapper must still build child mappings instead of letting
+// TableReader cast between incompatible struct shapes.
+TEST(ColumnMapperCreateMappingTest, SynthesizesMissingMapValueStructChildrenFromType) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const auto table_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"});
+    const auto file_value_type = std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type},
+                                                                  Strings {"age", "name"});
+
+    auto table_map = name_col("new_map_column",
+                              std::make_shared<DataTypeMap>(string_type, table_value_type));
+    table_map.name_mapping = {"map_column"};
+    set_name_identifiers(&table_map, -1);
+
+    auto file_age = name_col("age", int_type, 0);
+    auto file_name = name_col("name", string_type, 1);
+    auto file_value = struct_name_col("value", {file_age, file_name}, 1);
+    auto file_key = name_col("key", string_type, 0);
+    auto file_map =
+            map_col("map_column", -1, {file_key, file_value}, string_type, file_value_type, 5);
+    set_name_identifiers(&file_map, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto& map_mapping = mapper.mappings()[0];
+    ASSERT_EQ(map_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key");
+    EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key");
+    EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0);
+
+    const auto& value_mapping = map_mapping.child_mappings[1];
+    EXPECT_EQ(value_mapping.table_column_name, "value");
+    EXPECT_EQ(value_mapping.file_column_name, "value");
+    EXPECT_EQ(*value_mapping.file_local_id, 1);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age");
+    EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age");
+    EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 0);
+    EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name");
+    EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name");
+    EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 1);
+    EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender");
+    expect_missing(value_mapping.child_mappings[2]);
+    EXPECT_FALSE(value_mapping.is_trivial);
+}
+
+// Scenario: MAP_KEYS(new_map_column) may build a key-only nested projection, while SELECT * still
+// needs the whole map root. The mapper must add a synthetic value child and recursively map the old
+// value struct instead of treating Struct(name, age) as a leaf to CAST into the table value struct.
+TEST(ColumnMapperCreateMappingTest, KeyOnlyMapProjectionStillMapsEvolvedValueStruct) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const auto table_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"});
+    const auto file_value_type = std::make_shared<DataTypeStruct>(DataTypes {string_type, int_type},
+                                                                  Strings {"name", "age"});
+
+    auto table_key = name_col("key", string_type);
+    auto table_map = map_col("new_map_column", -1, {table_key}, string_type, table_value_type);
+    table_map.name_mapping = {"map_column"};
+    set_name_identifiers(&table_map, -1);
+
+    auto file_key = name_col("key", string_type, 0);
+    auto file_name = name_col("name", string_type, 0);
+    auto file_age = name_col("age", int_type, 1);
+    auto file_value = struct_name_col("value", {file_name, file_age}, 1);
+    auto file_map =
+            map_col("map_column", -1, {file_key, file_value}, string_type, file_value_type, 5);
+    set_name_identifiers(&file_map, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto& map_mapping = mapper.mappings()[0];
+    ASSERT_EQ(map_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key");
+    EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key");
+    EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0);
+
+    const auto& value_mapping = map_mapping.child_mappings[1];
+    EXPECT_EQ(value_mapping.table_column_name, "value");
+    EXPECT_EQ(value_mapping.file_column_name, "value");
+    EXPECT_EQ(*value_mapping.file_local_id, 1);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age");
+    EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age");
+    EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 1);
+    EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name");
+    EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name");
+    EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 0);
+    EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender");
+    expect_missing(value_mapping.child_mappings[2]);
+    EXPECT_FALSE(value_mapping.is_trivial);
+}
+
+// Scenario: Iceberg uses field-id mapping, but a key-only map projection may force the mapper to
+// synthesize the missing value struct from DataType names, which do not carry field ids. The mapper
+// must name-match synthesized children before ordinal fallback, otherwise `age` would read old
+// file child `name` and the later materialization would build the value struct incorrectly.
+TEST(ColumnMapperCreateMappingTest,
+     KeyOnlyMapProjectionSynthesizedValueStructNameMatchesBeforeOrdinalFallback) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const auto table_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, string_type, string_type}, Strings {"age", "full_name", "gender"});
+    const auto file_value_type = std::make_shared<DataTypeStruct>(DataTypes {string_type, int_type},
+                                                                  Strings {"name", "age"});
+
+    auto table_key = field_id_col("key", 10, string_type, 0);
+    auto table_map = map_col("new_map_column", 2, {table_key}, string_type, table_value_type);
+
+    auto file_key = field_id_col("key", 10, string_type, 0);
+    auto file_name = field_id_col("name", 7, string_type, 0);
+    auto file_age = field_id_col("age", 8, int_type, 1);
+    auto file_value = struct_col("value", 11, {file_name, file_age}, 1);
+    auto file_map =
+            map_col("new_map_column", 2, {file_key, file_value}, string_type, file_value_type, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto& map_mapping = mapper.mappings()[0];
+    ASSERT_EQ(map_mapping.child_mappings.size(), 2);
+    EXPECT_EQ(map_mapping.child_mappings[0].table_column_name, "key");
+    EXPECT_EQ(map_mapping.child_mappings[0].file_column_name, "key");
+    EXPECT_EQ(*map_mapping.child_mappings[0].file_local_id, 0);
+
+    const auto& value_mapping = map_mapping.child_mappings[1];
+    EXPECT_EQ(value_mapping.table_column_name, "value");
+    EXPECT_EQ(value_mapping.file_column_name, "value");
+    EXPECT_EQ(*value_mapping.file_local_id, 1);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(value_mapping.child_mappings[0].table_column_name, "age");
+    EXPECT_EQ(value_mapping.child_mappings[0].file_column_name, "age");
+    EXPECT_EQ(*value_mapping.child_mappings[0].file_local_id, 1);
+    EXPECT_EQ(value_mapping.child_mappings[1].table_column_name, "full_name");
+    EXPECT_EQ(value_mapping.child_mappings[1].file_column_name, "name");
+    EXPECT_EQ(*value_mapping.child_mappings[1].file_local_id, 0);
+    EXPECT_EQ(value_mapping.child_mappings[2].table_column_name, "gender");
+    expect_missing(value_mapping.child_mappings[2]);
+    EXPECT_FALSE(value_mapping.is_trivial);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByFieldIdDoesNotFallbackToNameAndUsesFirstDuplicate) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("renamed", 10, int_type),
+            name_col("same_name", int_type),
+            field_id_col("negative", -7, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("first", 10, int_type, 0),
+            field_id_col("second", 10, int_type, 1),
+            field_id_col("same_name", 99, int_type, 2),
+            field_id_col("negative_file", -7, int_type, 3),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "renamed", 0, "first", int_type, int_type);
+    expect_missing(mapper.mappings()[1]);
+    expect_mapping(mapper.mappings()[2], 2, "negative", 3, "negative_file", int_type, int_type);
+}
+
+// Scenario: Iceberg TopN lazy materialization uses BY_FIELD_ID for schema evolution and also asks
+// the file reader to synthesize GLOBAL_ROWID. GLOBAL_ROWID is matched by ColumnType before the
+// field-id matcher, so keeping BY_FIELD_ID does not make the mapper look for a numeric field id for
+// that virtual column.
+TEST(ColumnMapperCreateMappingTest, ByFieldIdMapsGlobalRowIdByVirtualColumnType) {
+    const auto int_type = i32();
+    auto table_rowid = global_rowid_column_definition();
+    table_rowid.name = BeConsts::GLOBAL_ROWID_COL + "equality_delete_par_1";
+    table_rowid.identifier = Field::create_field<TYPE_STRING>(table_rowid.name);
+
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("new_new_id", 1, int_type),
+            table_rowid,
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("id", 1, int_type, 0),
+            global_rowid_column_definition(),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    expect_mapping(mapper.mappings()[0], 0, "new_new_id", 0, "id", int_type, int_type);
+    expect_mapping(mapper.mappings()[1], 1, table_rowid.name, GLOBAL_ROWID_COLUMN_ID,
+                   BeConsts::GLOBAL_ROWID_COL, str(), str());
+}
+
+TEST(ColumnMapperCreateMappingTest, ByFieldIdTreatsSameNameDifferentFieldIdAsMissing) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("same_name", 10, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("same_name", 20, int_type, 0),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_missing(mapper.mappings()[0]);
+}
+
+TEST(ColumnMapperCreateMappingTest, NestedFieldIdTreatsSameNameDifferentFieldIdAsMissing) {
+    const auto int_type = i32();
+    auto table_child = field_id_col("child", 10, int_type);
+    auto table_root = struct_col("root", 1, {table_child});
+
+    auto file_child = field_id_col("child", 20, int_type, 0);
+    auto file_root = struct_col("root", 1, {file_child}, 0);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    const auto status = mapper.create_mapping({table_root}, {}, {file_root});
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "root", 0, "root", file_root.type, table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 1);
+    expect_missing(mapper.mappings()[0].child_mappings[0]);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexMapsTopLevelColumnsByPositionIgnoringFileNames) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("user_id", 0, int_type),
+            position_col("user_name", 1, string_type),
+            position_col("age", 2, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+            field_id_col("_col1", 101, string_type, 1),
+            field_id_col("_col2", 102, int_type, 2),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "user_id", 0, "_col0", int_type, int_type);
+    expect_mapping(mapper.mappings()[1], 1, "user_name", 1, "_col1", string_type, string_type);
+    expect_mapping(mapper.mappings()[2], 2, "age", 2, "_col2", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexSupportsSparseProjection) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("age", 2, int_type),
+            position_col("score", 4, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0), field_id_col("_col1", 101, int_type, 1),
+            field_id_col("_col2", 102, int_type, 2), field_id_col("_col3", 103, int_type, 3),
+            field_id_col("_col4", 104, int_type, 4),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    expect_mapping(mapper.mappings()[0], 0, "age", 2, "_col2", int_type, int_type);
+    expect_mapping(mapper.mappings()[1], 1, "score", 4, "_col4", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest,
+     ByIndexMatchesNestedStructChildrenByNameEvenWhenChildrenHaveFieldIds) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    // Hive positional mapping only applies to top-level columns. FE/history schema metadata can
+    // still put field-id style integer identifiers on nested struct children. Those nested
+    // identifiers must not be interpreted as file positions.
+    auto table_root = struct_col("profile", 1,
+                                 {
+                                         field_id_col("id", 100, int_type),
+                                         field_id_col("name", 101, string_type),
+                                 });
+    // Reverse the file child order so a wrong positional match either misses the child or reads
+    // the wrong physical child. The expected mapping below proves the children are matched by name.
+    auto file_root = struct_name_col("_col1",
+                                     {
+                                             name_col("name", string_type, 0),
+                                             name_col("id", int_type, 1),
+                                     },
+                                     1);
+    const std::vector<ColumnDefinition> table_schema = {table_root};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 1000, string_type, 0),
+            file_root,
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "profile", 1, "_col1", file_root.type, table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2);
+    expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "id", 1, "id", int_type, int_type);
+    expect_mapping(mapper.mappings()[0].child_mappings[1], 0, "name", 0, "name", string_type,
+                   string_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexNestedStructDoesNotUseChildOrdinalIdentifier) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    // This is the dangerous variant of the previous case: the nested integer identifiers happen
+    // to be valid child ordinals. BY_INDEX must still ignore them below the top-level root.
+    auto table_root = struct_col("profile", 1,
+                                 {
+                                         field_id_col("id", 0, int_type),
+                                         field_id_col("name", 1, string_type),
+                                 });
+    // If the implementation uses child ordinal matching, id/name will be swapped here.
+    auto file_root = struct_name_col("_col1",
+                                     {
+                                             name_col("name", string_type, 0),
+                                             name_col("id", int_type, 1),
+                                     },
+                                     1);
+    const std::vector<ColumnDefinition> table_schema = {table_root};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 1000, string_type, 0),
+            file_root,
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "profile", 1, "_col1", file_root.type, table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2);
+    expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "id", 1, "id", int_type, int_type);
+    expect_mapping(mapper.mappings()[0].child_mappings[1], 0, "name", 0, "name", string_type,
+                   string_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexArrayElementStructChildrenMatchByName) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    // The top-level ARRAY column is selected by file position. After that, ARRAY has a single
+    // structural child, and the element STRUCT should use Hive's nested-by-name behavior.
+    auto table_element = struct_col("element", 10,
+                                    {
+                                            field_id_col("id", 100, int_type),
+                                            field_id_col("name", 101, string_type),
+                                    });
+    auto table_root = array_col("profiles", 1, table_element);
+    // Reverse the element struct children to distinguish name matching from position matching.
+    auto file_element = struct_name_col("element",
+                                        {
+                                                name_col("name", string_type, 0),
+                                                name_col("id", int_type, 1),
+                                        },
+                                        0);
+    auto file_root = array_col("_col1", 1001, file_element, 1);
+    const std::vector<ColumnDefinition> table_schema = {table_root};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 1000, string_type, 0),
+            file_root,
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "profiles", 1, "_col1", file_root.type,
+                   table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 1);
+    const auto& element_mapping = mapper.mappings()[0].child_mappings[0];
+    expect_mapping(element_mapping, 0, "element", 0, "element", file_element.type,
+                   table_element.type);
+    ASSERT_EQ(element_mapping.child_mappings.size(), 2);
+    expect_mapping(element_mapping.child_mappings[0], 0, "id", 1, "id", int_type, int_type);
+    expect_mapping(element_mapping.child_mappings[1], 0, "name", 0, "name", string_type,
+                   string_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexMapValueStructChildrenMatchByName) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const auto key_type = str();
+    // MAP key/value are structural children, so BY_INDEX should not reinterpret their nested
+    // integer identifiers as arbitrary positions. The value STRUCT then follows name matching.
+    auto table_key = field_id_col("key", 10, key_type);
+    auto table_value = struct_col("value", 11,
+                                  {
+                                          field_id_col("id", 100, int_type),
+                                          field_id_col("name", 101, string_type),
+                                  });
+    auto table_root = map_col("profiles", 1, {table_key, table_value}, key_type, table_value.type);
+    auto file_key = name_col("key", key_type, 0);
+    // Reverse value struct children. A positional nested match would produce name/id swapped.
+    auto file_value = struct_name_col("value",
+                                      {
+                                              name_col("name", string_type, 0),
+                                              name_col("id", int_type, 1),
+                                      },
+                                      1);
+    auto file_root = map_col("_col1", 1001, {file_key, file_value}, key_type, file_value.type, 1);
+    const std::vector<ColumnDefinition> table_schema = {table_root};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 1000, string_type, 0),
+            file_root,
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "profiles", 1, "_col1", file_root.type,
+                   table_root.type);
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2);
+    expect_mapping(mapper.mappings()[0].child_mappings[0], 0, "key", 0, "key", key_type, key_type);
+    const auto& value_mapping = mapper.mappings()[0].child_mappings[1];
+    expect_mapping(value_mapping, 0, "value", 1, "value", file_value.type, table_value.type);
+    ASSERT_EQ(value_mapping.child_mappings.size(), 2);
+    expect_mapping(value_mapping.child_mappings[0], 0, "id", 1, "id", int_type, int_type);
+    expect_mapping(value_mapping.child_mappings[1], 0, "name", 0, "name", string_type, string_type);
+}
+
+TEST(ColumnMapperCreateMappingTest,
+     ByIndexPartitionColumnsTakeConstantAndDoNotConsumeFilePosition) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto partition = name_col("dt", string_type);
+    partition.is_partition_key = true;
+    const std::vector<ColumnDefinition> table_schema = {
+            partition,
+            position_col("user_id", 0, int_type),
+            position_col("score", 1, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+            field_id_col("_col1", 101, int_type, 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema,
+                                      {{"dt", Field::create_field<TYPE_STRING>("2026-06-11")}},
+                                      file_schema)
+                        .ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_constant(mapper, mapper.mappings()[0], 0, string_type);
+    expect_mapping(mapper.mappings()[1], 1, "user_id", 0, "_col0", int_type, int_type);
+    expect_mapping(mapper.mappings()[2], 2, "score", 1, "_col1", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexOutOfRangeFallsBackToDefaultOrMissing) {
+    const auto int_type = i32();
+    auto with_default = position_col("extra_default", 5, int_type);
+    const auto literal_expr =
+            VExprContext::create_shared(literal(int_type, Field::create_field<TYPE_INT>(42)));
+    with_default.default_expr = literal_expr;
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 0, int_type),
+            with_default,
+            position_col("extra_missing", 99, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+            field_id_col("_col1", 101, int_type, 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type);
+    expect_constant(mapper, mapper.mappings()[1], 1, int_type);
+    EXPECT_EQ(mapper.mappings()[1].default_expr, literal_expr);
+    expect_missing(mapper.mappings()[2]);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexMissingIdentifierFallsBackToDefaultOrMissing) {
+    const auto int_type = i32();
+    auto with_default = name_col("extra_default", int_type);
+    const auto literal_expr =
+            VExprContext::create_shared(literal(int_type, Field::create_field<TYPE_INT>(42)));
+    with_default.default_expr = literal_expr;
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 0, int_type),
+            with_default,
+            name_col("extra_missing", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type);
+    expect_constant(mapper, mapper.mappings()[1], 1, int_type);
+    EXPECT_EQ(mapper.mappings()[1].default_expr, literal_expr);
+    expect_missing(mapper.mappings()[2]);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexOutOfRangeFallsBackToMissing) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 0, int_type),
+            position_col("b", 5, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    const auto status = mapper.create_mapping(table_schema, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type);
+    expect_missing(mapper.mappings()[1]);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexIgnoresExtraFileColumns) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 0, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_col0", 100, int_type, 0),
+            field_id_col("_col1", 101, int_type, 1),
+            field_id_col("_col2", 102, int_type, 2),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "a", 0, "_col0", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, ByIndexIgnoresFileColumnNames) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            position_col("a", 1, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("a", 100, int_type, 10),
+            field_id_col("b", 101, int_type, 20),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_INDEX});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_mapping(mapper.mappings()[0], 0, "a", 20, "b", int_type, int_type);
+}
+
+TEST(ColumnMapperCreateMappingTest, MissingColumnFallsBackToMissingMapping) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    const auto status = mapper.create_mapping({name_col("missing", i32())}, {},
+                                              {name_col("present", i32(), 0)});
+    ASSERT_TRUE(status.ok()) << status.to_string();
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_missing(mapper.mappings()[0]);
+}
+
+// ----------------------------------------------------------------------
+// L1 constants and virtual columns.
+// These tests verify non-file-backed mappings before TableReader materializes
+// their final values.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperConstantTest, PartitionDefaultAndVirtualColumnsUseDedicatedBranches) {
+    auto partition_column = name_col("dt", str());
+    partition_column.is_partition_key = true;
+
+    auto default_column = name_col("new_value", i32());
+    default_column.default_expr =
+            VExprContext::create_shared(literal(i32(), Field::create_field<TYPE_INT>(42)));
+
+    auto row_id_column = name_col("_row_id", make_nullable(i64()));
+    auto sequence_column = name_col("_last_updated_sequence_number", make_nullable(i64()));
+    auto iceberg_rowid_column = name_col(BeConsts::ICEBERG_ROWID_COL, str());
+
+    const std::vector<ColumnDefinition> table_schema = {
+            partition_column, default_column, row_id_column, sequence_column, iceberg_rowid_column};
+    const std::map<std::string, Field> partition_values = {
+            {"dt", Field::create_field<TYPE_STRING>("2026-06-11")},
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, partition_values, {}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 5);
+    expect_constant(mapper, mapper.mappings()[0], 0, str());
+    expect_constant(mapper, mapper.mappings()[1], 1, i32());
+    EXPECT_EQ(mapper.mappings()[2].virtual_column_type, TableVirtualColumnType::ROW_ID);
+    EXPECT_EQ(mapper.mappings()[3].virtual_column_type,
+              TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER);
+    EXPECT_EQ(mapper.mappings()[4].virtual_column_type, TableVirtualColumnType::ICEBERG_ROWID);
+}
+
+TEST(ColumnMapperConstantTest, PhysicalRowLineageFiltersStayFinalizeOnly) {
+    auto row_id_column = name_col("_row_id", make_nullable(i64()));
+    auto sequence_column = name_col("_last_updated_sequence_number", make_nullable(i64()));
+    const std::vector<ColumnDefinition> table_schema = {row_id_column, sequence_column};
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("_row_id", make_nullable(i64()), 2147483540),
+            name_col("_last_updated_sequence_number", make_nullable(i64()), 2147483539),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    EXPECT_EQ(mapper.mappings()[0].virtual_column_type, TableVirtualColumnType::ROW_ID);
+    EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::FINALIZE_ONLY);
+    EXPECT_EQ(mapper.mappings()[1].virtual_column_type,
+              TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER);
+    EXPECT_EQ(mapper.mappings()[1].filter_conversion, FilterConversionType::FINALIZE_ONLY);
+
+    auto row_id_filter =
+            binary_predicate(TExprOpcode::EQ, table_slot(0, 0, make_nullable(i64()), "_row_id"),
+                             literal(i64(), Field::create_field<TYPE_BIGINT>(1001)));
+    auto sequence_filter = binary_predicate(
+            TExprOpcode::EQ,
+            table_slot(1, 1, make_nullable(i64()), "_last_updated_sequence_number"),
+            literal(i64(), Field::create_field<TYPE_BIGINT>(77)));
+    TableFilter row_id_table_filter {.conjunct = VExprContext::create_shared(row_id_filter),
+                                     .global_indices = {GlobalIndex(0)}};
+    TableFilter sequence_table_filter {.conjunct = VExprContext::create_shared(sequence_filter),
+                                       .global_indices = {GlobalIndex(1)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({row_id_table_filter, sequence_table_filter}, {},
+                                           table_schema, &request)
+                        .ok());
+
+    EXPECT_TRUE(request.conjuncts.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_EQ(projection_ids(request.non_predicate_columns),
+              std::vector<int32_t>({2147483540, 2147483539}));
+}
+
+TEST(ColumnMapperConstantTest, MissingRowLineageDefaultExprStillUsesVirtualMapping) {
+    auto id_column = field_id_col("id", 1, make_nullable(i32()));
+    auto row_id_column = field_id_col("renamed_row_id", 2147483540, make_nullable(i64()));
+    row_id_column.default_expr = VExprContext::create_shared(
+            literal(make_nullable(i64()), Field::create_field<TYPE_BIGINT>(0)));
+    auto sequence_column =
+            field_id_col("renamed_last_updated_sequence_number", 2147483539, make_nullable(i64()));
+    sequence_column.default_expr = VExprContext::create_shared(
+            literal(make_nullable(i64()), Field::create_field<TYPE_BIGINT>(0)));
+
+    const std::vector<ColumnDefinition> table_schema = {id_column, row_id_column, sequence_column};
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("id", 1, make_nullable(i32()), 0),
+            field_id_col("name", 2, make_nullable(str()), 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 3);
+    expect_mapping(mapper.mappings()[0], 0, "id", 0, "id", make_nullable(i32()),
+                   make_nullable(i32()));
+    EXPECT_EQ(mapper.mappings()[1].virtual_column_type, TableVirtualColumnType::ROW_ID);
+    EXPECT_FALSE(mapper.mappings()[1].constant_index.has_value());
+    EXPECT_EQ(mapper.mappings()[2].virtual_column_type,
+              TableVirtualColumnType::LAST_UPDATED_SEQUENCE_NUMBER);
+    EXPECT_FALSE(mapper.mappings()[2].constant_index.has_value());
+    EXPECT_TRUE(mapper.constant_map().empty());
+}
+
+TEST(ColumnMapperConstantTest, ByFieldIdDoesNotTreatSameNameDifferentIdAsRowLineage) {
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("_row_id", 100, make_nullable(i64())),
+            field_id_col("_last_updated_sequence_number", 101, make_nullable(i64())),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("_row_id", 100, make_nullable(i64()), 0),
+            field_id_col("_last_updated_sequence_number", 101, make_nullable(i64()), 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    expect_mapping(mapper.mappings()[0], 0, "_row_id", 0, "_row_id", make_nullable(i64()),
+                   make_nullable(i64()));
+    EXPECT_EQ(mapper.mappings()[0].virtual_column_type, TableVirtualColumnType::INVALID);
+    EXPECT_EQ(mapper.mappings()[0].filter_conversion, FilterConversionType::COPY_DIRECTLY);
+    expect_mapping(mapper.mappings()[1], 1, "_last_updated_sequence_number", 1,
+                   "_last_updated_sequence_number", make_nullable(i64()), make_nullable(i64()));
+    EXPECT_EQ(mapper.mappings()[1].virtual_column_type, TableVirtualColumnType::INVALID);
+    EXPECT_EQ(mapper.mappings()[1].filter_conversion, FilterConversionType::COPY_DIRECTLY);
+}
+
+TEST(ColumnMapperConstantTest, PartitionAliasResolvesRenamedValue) {
+    auto partition_column = name_col("current_dt", str());
+    partition_column.name_mapping = {"legacy_dt"};
+    partition_column.is_partition_key = true;
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(
+                              {partition_column},
+                              {{"legacy_dt", Field::create_field<TYPE_STRING>("2026-06-11")}}, {})
+                        .ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    expect_constant(mapper, mapper.mappings()[0], 0, str());
+}
+
+TEST(ColumnMapperConstantTest, PartitionConstantFilterEntryDoesNotReadFileColumns) {
+    auto partition_column = name_col("part", i32());
+    partition_column.is_partition_key = true;
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({partition_column},
+                                      {{"part", Field::create_field<TYPE_INT>(7)}}, {})
+                        .ok());
+
+    TableFilter filter {
+            .conjunct = VExprContext::create_shared(int_gt(table_slot(0, 0, i32(), "part"), 1)),
+            .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {partition_column}, &request).ok());
+
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).constant_index(),
+              *mapper.mappings()[0].constant_index);
+    EXPECT_TRUE(request.local_positions.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    EXPECT_TRUE(request.conjuncts.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+TEST(ColumnMapperConstantTest, DefaultConstantFilterEntryUsesDefaultExpression) {
+    auto default_column = name_col("new_value", i32());
+    default_column.default_expr =
+            VExprContext::create_shared(literal(i32(), Field::create_field<TYPE_INT>(42)));
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({default_column}, {}, {}).ok());
+
+    TableFilter filter {.conjunct = VExprContext::create_shared(
+                                int_gt(table_slot(0, 0, i32(), "new_value"), 1)),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {default_column}, &request).ok());
+
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant());
+    const auto constant_index = mapper.filter_entries().at(GlobalIndex(0)).constant_index();
+    EXPECT_EQ(constant_index, *mapper.mappings()[0].constant_index);
+    EXPECT_EQ(mapper.constant_map().get(constant_index).expr, default_column.default_expr);
+    EXPECT_TRUE(request.local_positions.empty());
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    EXPECT_TRUE(request.conjuncts.empty());
+}
+
+TEST(ColumnMapperConstantTest, MixedConstantAndFileFilterKeepsOnlyFileScanColumn) {
+    auto partition_column = name_col("part", i32());
+    partition_column.is_partition_key = true;
+    const auto file_column = name_col("score", i32(), 3);
+    const std::vector<ColumnDefinition> table_schema = {partition_column, file_column};
+    const std::vector<ColumnDefinition> file_schema = {file_column};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {{"part", Field::create_field<TYPE_INT>(7)}},
+                                      file_schema)
+                        .ok());
+
+    TableFilter constant_filter {
+            .conjunct = VExprContext::create_shared(int_gt(table_slot(0, 0, i32(), "part"), 1)),
+            .global_indices = {GlobalIndex(0)}};
+    TableFilter file_filter {
+            .conjunct = VExprContext::create_shared(int_gt(table_slot(1, 1, i32(), "score"), 10)),
+            .global_indices = {GlobalIndex(1)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({constant_filter, file_filter}, {}, table_schema, &request)
+                    .ok());
+
+    ASSERT_EQ(mapper.filter_entries().size(), 2);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant());
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(1)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(1)).local_index(), LocalIndex(0));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(3));
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+}
+
+// ----------------------------------------------------------------------
+// L1 direct filter localization tests.
+// These tests call localize_filters() directly to pin the core interface
+// contract apart from create_scan_request() initialization.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperLocalizeFiltersTest, VisibleLocalFilterAddsPredicateColumnAndConjunct) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {name_col("id", int_type)};
+    const std::vector<ColumnDefinition> file_schema = {name_col("id", int_type, 7)};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(11, 0, int_type, "id")),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(7));
+    ASSERT_EQ(request.local_positions.size(), 1);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(7)), LocalIndex(0));
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0));
+
+    ASSERT_EQ(request.conjuncts.size(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(request.conjuncts[0]->root().get());
+    EXPECT_EQ(localized_slot->slot_id(), 11);
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_EQ(localized_slot->column_name(), "id");
+    EXPECT_TRUE(localized_slot->data_type()->equals(*int_type));
+}
+
+TEST(ColumnMapperLocalizeFiltersTest, ConstantFilterBuildsEntryWithoutFileScanColumn) {
+    auto partition_column = name_col("part", i32());
+    partition_column.is_partition_key = true;
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({partition_column},
+                                      {{"part", Field::create_field<TYPE_INT>(7)}}, {})
+                        .ok());
+
+    TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(3, 0, i32(), "part")),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok());
+
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    EXPECT_TRUE(request.local_positions.empty());
+    EXPECT_TRUE(request.conjuncts.empty());
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_constant());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).constant_index(),
+              mapper.mappings()[0].constant_index);
+}
+
+TEST(ColumnMapperLocalizeFiltersTest, ColumnPredicatesUseOnlyExistingLocalPositions) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {name_col("id", int_type)};
+    const std::vector<ColumnDefinition> file_schema = {name_col("id", int_type, 3)};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    TableColumnPredicates predicates;
+    predicates[GlobalIndex(0)] = {create_comparison_predicate<PredicateType::GT>(
+            0, "id", int_type, Field::create_field<TYPE_INT>(10), false)};
+
+    FileScanRequest request_without_local_position;
+    ASSERT_TRUE(mapper.localize_filters({}, predicates, &request_without_local_position).ok());
+    EXPECT_TRUE(request_without_local_position.column_predicate_filters.empty());
+    ASSERT_EQ(mapper.filter_entries().size(), 1);
+    EXPECT_FALSE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+
+    FileScanRequest request_with_local_position;
+    request_with_local_position.non_predicate_columns.push_back(
+            LocalColumnIndex::top_level(LocalColumnId(3)));
+    request_with_local_position.local_positions.emplace(LocalColumnId(3), LocalIndex(0));
+    ASSERT_TRUE(mapper.localize_filters({}, predicates, &request_with_local_position).ok());
+
+    ASSERT_EQ(request_with_local_position.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request_with_local_position.non_predicate_columns[0].column_id(), LocalColumnId(3));
+    EXPECT_TRUE(request_with_local_position.predicate_columns.empty());
+    ASSERT_EQ(request_with_local_position.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request_with_local_position.column_predicate_filters[0].effective_file_column_id(),
+              LocalColumnId(3));
+    ASSERT_EQ(request_with_local_position.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request_with_local_position.column_predicate_filters[0].predicates[0]->type(),
+              PredicateType::GT);
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0));
+}
+
+TEST(ColumnMapperLocalizeFiltersTest, NestedFilterOnlyChildMergesIntoPredicateProjection) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = name_col("a", int_type);
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr = int_gt(
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a"), 10);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5));
+    ASSERT_FALSE(request.predicate_columns[0].project_all_children);
+    EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector<int32_t>({0, 1}));
+    ASSERT_EQ(request.local_positions.size(), 1);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(5)), LocalIndex(0));
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(0));
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"a"}));
+}
+
+TEST(ColumnMapperLocalizeFiltersTest, PreservesExistingScanStateWhenAddingPredicateColumn) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            name_col("id", int_type),
+            name_col("score", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("id", int_type, 3),
+            name_col("score", int_type, 4),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    TableFilter filter {.conjunct = VExprContext::create_shared(table_slot(2, 0, int_type, "id")),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    request.non_predicate_columns.push_back(LocalColumnIndex::top_level(LocalColumnId(4)));
+    request.local_positions.emplace(LocalColumnId(4), LocalIndex(0));
+    ASSERT_TRUE(mapper.localize_filters({filter}, {}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(4));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(3));
+    ASSERT_EQ(request.local_positions.size(), 2);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(4)), LocalIndex(0));
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(3)), LocalIndex(1));
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(0)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(0)).local_index(), LocalIndex(1));
+}
+
+// ----------------------------------------------------------------------
+// L1 scan request and filter localization tests.
+// These tests assert predicate/non-predicate split, local positions, hidden
+// filter mappings, and nested predicate targets.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperScanRequestTest, ColumnPredicatesDoNotForceRowPredicateMaterialization) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    const std::vector<ColumnDefinition> table_schema = {
+            name_col("id", int_type),
+            name_col("name", string_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            name_col("id", int_type, 0),
+            name_col("name", string_type, 1),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    TableColumnPredicates predicates;
+    predicates[GlobalIndex(0)] = {create_comparison_predicate<PredicateType::GT>(
+            0, "id", int_type, Field::create_field<TYPE_INT>(10), false)};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, predicates, table_schema, &request).ok());
+
+    EXPECT_TRUE(request.predicate_columns.empty());
+    EXPECT_EQ(projection_ids(request.non_predicate_columns), std::vector<int32_t>({0, 1}));
+    ASSERT_EQ(request.local_positions.size(), 2);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(0)), LocalIndex(0));
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(1)), LocalIndex(1));
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(0));
+}
+
+TEST(ColumnMapperScanRequestTest, HiddenTopLevelFilterMappingUsesNameFallback) {
+    const auto int_type = i32();
+    const std::vector<ColumnDefinition> table_schema = {
+            field_id_col("id", 1, int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            field_id_col("id", 1, int_type, 0),
+            field_id_col("score", 2, int_type, 1),
+    };
+
+    auto filter_expr = int_gt(table_slot(7, 1, int_type, "score"), 10);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(1)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, file_schema).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, table_schema, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(0));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+    ASSERT_TRUE(mapper.filter_entries().at(GlobalIndex(1)).is_local());
+    EXPECT_EQ(mapper.filter_entries().at(GlobalIndex(1)).local_index(), LocalIndex(1));
+}
+
+TEST(ColumnMapperScanRequestTest, StructOutputAndFilterOnlyChildAreMerged) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = name_col("a", int_type);
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr = int_gt(
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a"), 10);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5));
+    ASSERT_FALSE(request.predicate_columns[0].project_all_children);
+    EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector<int32_t>({0, 1}));
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"a"}));
+}
+
+TEST(ColumnMapperScanRequestTest, RenamedNestedPredicateTargetsMappedFileChild) {
+    const auto int_type = i32();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b});
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_struct = struct_col("s", 10, {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr = int_gt(
+            struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "renamed_b"), 10);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({1}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"b"}));
+}
+
+TEST(ColumnMapperScanRequestTest, NestedInNullAndReverseComparisonFiltersAreMerged) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = name_col("a", int_type);
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = struct_name_col("s", {table_a, table_b});
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto nested_a =
+            struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "a");
+    auto in_filter =
+            in_predicate(nested_a, int_type,
+                         {Field::create_field<TYPE_INT>(5), Field::create_field<TYPE_INT>(7)});
+    auto reverse_filter = binary_predicate(
+            TExprOpcode::LT, literal(int_type, Field::create_field<TYPE_INT>(3)), nested_a);
+    auto null_filter = null_predicate(nested_a, true);
+    auto not_null_filter = null_predicate(nested_a, false);
+    auto filter_expr = compound_predicate(
+            TExprOpcode::COMPOUND_AND,
+            compound_predicate(TExprOpcode::COMPOUND_AND, in_filter, reverse_filter),
+            compound_predicate(TExprOpcode::COMPOUND_AND, null_filter, not_null_filter));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"a"}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 4);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::IN_LIST);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[1]->type(), PredicateType::GT);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[2]->type(), PredicateType::IS_NULL);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[3]->type(),
+              PredicateType::IS_NOT_NULL);
+}
+
+TEST(ColumnMapperScanRequestTest, NestedPredicateFilterThroughSafeCast) {
+    const auto file_int_type = i32();
+    const auto table_bigint_type = i64();
+    const auto string_type = str();
+
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = std::make_shared<DataTypeStruct>(
+            DataTypes {table_bigint_type, string_type}, Strings {"a", "b"});
+
+    auto file_a = name_col("a", file_int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto nested_a =
+            struct_element(table_slot(0, 0, full_table_struct, "s"), file_int_type, "a");
+    auto filter_expr =
+            binary_predicate(TExprOpcode::GT, cast_expr(nested_a, table_bigint_type),
+                             literal(table_bigint_type, Field::create_field<TYPE_BIGINT>(5)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::GT);
+}
+
+TEST(ColumnMapperScanRequestTest, UnsafeCastDoesNotBuildNestedPredicateFilter) {
+    const auto file_bigint_type = i64();
+    const auto table_int_type = i32();
+    const auto string_type = str();
+
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    auto full_table_struct = std::make_shared<DataTypeStruct>(
+            DataTypes {table_int_type, string_type}, Strings {"a", "b"});
+
+    auto file_a = name_col("a", file_bigint_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto nested_a =
+            struct_element(table_slot(0, 0, full_table_struct, "s"), file_bigint_type, "a");
+    auto filter_expr = binary_predicate(TExprOpcode::GT, cast_expr(nested_a, table_int_type),
+                                        literal(table_int_type, Field::create_field<TYPE_INT>(5)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_EQ(projection_ids(request.predicate_columns[0].children), std::vector<int32_t>({0, 1}));
+}
+
+TEST(ColumnMapperScanRequestTest, DeepNestedPredicateTargetsLeafPath) {
+    const auto id_type = i32();
+    const auto name_type = str();
+    const auto string_type = str();
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+
+    auto full_table_inner_type =
+            std::make_shared<DataTypeStruct>(DataTypes {id_type, name_type}, Strings {"id", "n"});
+    auto full_table_struct_type = std::make_shared<DataTypeStruct>(
+            DataTypes {full_table_inner_type, string_type}, Strings {"a", "b"});
+
+    auto file_id = name_col("id", id_type, 0);
+    auto file_name = name_col("n", name_type, 1);
+    auto file_a = struct_name_col("a", {file_id, file_name}, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    const auto nested_id =
+            struct_element(struct_element(table_slot(0, 0, full_table_struct_type, "s"),
+                                          full_table_inner_type, "a"),
+                           id_type, "id");
+    auto filter_expr =
+            in_predicate(nested_id, id_type,
+                         {Field::create_field<TYPE_INT>(5), Field::create_field<TYPE_INT>(7)});
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(5));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0, 0}));
+    EXPECT_EQ(target_names(request.column_predicate_filters[0].target.struct_target.get()),
+              std::vector<std::string>({"a", "id"}));
+    ASSERT_EQ(request.column_predicate_filters[0].predicates.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].predicates[0]->type(), PredicateType::IN_LIST);
+}
+
+TEST(ColumnMapperScanRequestTest, ArrayStructProjectionPrunesElementChildren) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_b = name_col("b", string_type);
+    auto table_element = struct_name_col("element", {table_b});
+    auto table_array = array_col("items", -1, table_element);
+    table_array.identifier = Field::create_field<TYPE_STRING>("items");
+    set_name_identifiers(&table_array, -1);
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_element = struct_name_col("element", {file_a, file_b}, 0);
+    auto file_array = array_col("items", -1, file_element, 4);
+    file_array.identifier = Field::create_field<TYPE_STRING>("items");
+    set_name_identifiers(&file_array, 4);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(4));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 0);
+    ASSERT_EQ(projection.children[0].children.size(), 1);
+    EXPECT_EQ(projection.children[0].children[0].local_id(), 1);
+
+    const auto* mapped_array = assert_cast<const DataTypeArray*>(
+            remove_nullable(mapper.mappings()[0].file_type).get());
+    const auto* mapped_element = assert_cast<const DataTypeStruct*>(
+            remove_nullable(mapped_array->get_nested_type()).get());
+    ASSERT_EQ(mapped_element->get_elements().size(), 1);
+    EXPECT_EQ(mapped_element->get_element_name(0), "b");
+}
+
+TEST(ColumnMapperScanRequestTest, MapValueStructProjectionPrunesValueChildren) {
+    const auto key_type = str();
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_value_b = name_col("b", string_type);
+    auto table_value = struct_name_col("value", {table_value_b});
+    auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type);
+    table_map.identifier = Field::create_field<TYPE_STRING>("m");
+    set_name_identifiers(&table_map, -1);
+
+    auto file_key = name_col("key", key_type, 0);
+    auto file_value_a = name_col("a", int_type, 0);
+    auto file_value_b = name_col("b", string_type, 1);
+    auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1);
+    auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 6);
+    file_map.identifier = Field::create_field<TYPE_STRING>("m");
+    set_name_identifiers(&file_map, 6);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_map}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(6));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 1);
+    ASSERT_EQ(projection.children[0].children.size(), 1);
+    EXPECT_EQ(projection.children[0].children[0].local_id(), 1);
+
+    const auto* mapped_map =
+            assert_cast<const DataTypeMap*>(remove_nullable(mapper.mappings()[0].file_type).get());
+    const auto* mapped_value =
+            assert_cast<const DataTypeStruct*>(remove_nullable(mapped_map->get_value_type()).get());
+    ASSERT_EQ(mapped_value->get_elements().size(), 1);
+    EXPECT_EQ(mapped_value->get_element_name(0), "b");
+}
+
+// Scenario: a table struct projects only child `b`, while the file struct stores `a,b`.
+// BY_NAME mapping should read only the physical child `b` and rebuild the mapped file type to the
+// projected struct shape.
+TEST(ColumnMapperScanRequestTest, StructProjectionPrunesChildrenByName) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_b = name_col("b", string_type);
+    auto table_struct = struct_name_col("s", {table_b});
+    set_name_identifiers(&table_struct, 0);
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_struct = struct_name_col("s", {file_a, file_b}, 0);
+    set_name_identifiers(&file_struct, 0);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(0));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 1);
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    const auto* projected_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(mapper.mappings()[0].file_type).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "b");
+}
+
+// Scenario: a row filter reaches a struct child through an array wrapper
+// (`items.item.a > 5`). The nested predicate filter path only supports direct struct paths, so
+// the mapper keeps this as a row predicate and reads the full array root for predicate evaluation.
+TEST(ColumnMapperScanRequestTest, ArrayWrapperDoesNotBuildNestedPredicateFilter) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto file_a = name_col("a", int_type, 0);
+    auto file_b = name_col("b", string_type, 1);
+    auto file_element = struct_name_col("item", {file_a, file_b}, 0);
+    auto file_array = array_col("items", -1, file_element, 0);
+    set_name_identifiers(&file_array, 0);
+
+    auto table_array = file_array;
+
+    const auto item_type = file_element.type;
+    auto item_expr = struct_element(table_slot(0, 0, table_array.type, "items"), item_type, "item");
+    auto filter_expr = int_gt(struct_element(item_expr, int_type, "a"), 5);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_array}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(0));
+    EXPECT_TRUE(request.predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.predicate_columns[0].children.empty());
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: a map value struct projects child `b`, while a row filter reads value child `a`.
+// The filter is too complex to become a file-local nested predicate, but the predicate projection
+// must replace the output projection for the same map root and contain both physical value children.
+TEST(ColumnMapperScanRequestTest, MapFilterOnlyValueChildMergesWithOutputProjection) {
+    const auto key_type = i32();
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_value_b = name_col("b", string_type);
+    auto table_value = struct_name_col("value", {table_value_b});
+    auto table_map = map_col("m", -1, {table_value}, key_type, table_value.type);
+    set_name_identifiers(&table_map, 0);
+
+    auto file_key = name_col("key", key_type, 0);
+    auto file_value_a = name_col("a", int_type, 0);
+    auto file_value_b = name_col("b", string_type, 1);
+    auto file_value = struct_name_col("value", {file_value_a, file_value_b}, 1);
+    auto file_map = map_col("m", -1, {file_key, file_value}, key_type, file_value.type, 0);
+    set_name_identifiers(&file_map, 0);
+
+    auto full_value_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    auto full_map_type = std::make_shared<DataTypeMap>(key_type, full_value_type);
+    auto value_expr =
+            struct_element(table_slot(0, 0, full_map_type, "m"), full_value_type, "value");
+    auto filter_expr = int_gt(struct_element(value_expr, int_type, "a"), 5);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok());
+
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    const auto& projection = request.predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(0));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 1);
+    EXPECT_EQ(projection_ids(projection.children[0].children), std::vector<int32_t>({0, 1}));
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: when projected struct children are an in-order prefix of the file struct, the mapper can
+// read those physical children directly without rebuilding the file-side complex type.
+TEST(ColumnMapperScanRequestTest, MatchingProjectedStructDoesNotNeedComplexRematerialize) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_b = field_id_col("b", 2, string_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_b});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, string_type, 1);
+    auto file_c = field_id_col("c", 3, int_type, 2);
+    auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_FALSE(projection.project_all_children);
+    EXPECT_EQ(projection_ids(projection.children), std::vector<int32_t>({0, 1}));
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: Iceberg field-id mapping sees a renamed struct child, but the physical child order and
+// types still match, so projection remains a full physical read instead of rebuilding a new type.
+TEST(ColumnMapperScanRequestTest, RenameOnlyProjectedStructDoesNotRebuildFileProjection) {
+    const auto int_type = i32();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_struct = struct_col("s", 10, {file_a, file_b}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+    EXPECT_EQ(mapper.mappings()[0].projected_file_children.size(),
+              mapper.mappings()[0].original_file_children.size());
+    ASSERT_EQ(mapper.mappings()[0].child_mappings.size(), 2);
+    EXPECT_EQ(mapper.mappings()[0].child_mappings[1].table_column_name, "renamed_b");
+    EXPECT_EQ(mapper.mappings()[0].child_mappings[1].file_column_name, "b");
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: a row filter references an unprojected struct child, so the predicate projection is
+// merged with the output projection and the mapper rebuilds the projected file struct type.
+TEST(ColumnMapperScanRequestTest, PredicateProjectionRebuildsProjectedStructFileType) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_b = field_id_col("b", 2, string_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_b});
+    auto full_table_c = field_id_col("c", 3, int_type);
+    auto full_table_struct = struct_col("s", 10, {table_a, table_b, full_table_c});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, string_type, 1);
+    auto file_c = field_id_col("c", 3, int_type, 2);
+    auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr =
+            int_gt(struct_element(table_slot(0, 0, full_table_struct.type, "s"), int_type, "c"), 0);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    const auto& projection = request.predicate_columns[0];
+    EXPECT_FALSE(projection.project_all_children);
+    EXPECT_EQ(projection_ids(projection.children), std::vector<int32_t>({0, 1, 2}));
+
+    const auto* mapped_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(mapper.mappings()[0].file_type).get());
+    ASSERT_EQ(mapped_type->get_elements().size(), 3);
+    EXPECT_EQ(mapped_type->get_element_name(0), "a");
+    EXPECT_EQ(mapped_type->get_element_name(1), "b");
+    EXPECT_EQ(mapped_type->get_element_name(2), "c");
+    EXPECT_FALSE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: a filter references a top-level column that is not projected by the query; the mapper
+// creates a hidden filter mapping without adding that hidden column to visible table mappings.
+TEST(ColumnMapperScanRequestTest, PredicateOnlyTopLevelColumnUsesHiddenMapping) {
+    const auto int_type = i32();
+
+    auto table_id = field_id_col("id", 0, int_type);
+    auto table_c = field_id_col("c", 11, int_type);
+    auto table_struct = struct_col("s", 10, {table_c});
+
+    auto file_id = field_id_col("id", 0, int_type, 0);
+    auto file_c = field_id_col("c", 11, int_type, 0);
+    auto file_struct = struct_col("s", 10, {file_c}, 10);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_id}, {}, {file_id, file_struct}).ok());
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_EQ(mapper.mappings()[0].table_column_name, "id");
+
+    auto filter_expr =
+            int_gt(struct_element(table_slot(7, 1, table_struct.type, "s"), int_type, "c"), 0);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(1)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_id}, &request).ok());
+
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_EQ(mapper.mappings()[0].table_column_name, "id");
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(0));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(10));
+    EXPECT_TRUE(request.predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.predicate_columns[0].children.empty());
+
+    ASSERT_EQ(request.conjuncts.size(), 1);
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(10));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({0}));
+}
+
+// Scenario: a nested predicate targets a table-side renamed struct field; both predicate pruning and
+// scan projection must resolve that field to the old physical file child.
+TEST(ColumnMapperScanRequestTest, NestedPredicateProjectionUsesMappedRenamedChild) {
+    const auto int_type = i32();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_struct = struct_col("s", 10, {file_a, file_b}, 10);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    auto filter_expr = int_gt(
+            struct_element(table_slot(0, 0, table_struct.type, "s"), int_type, "renamed_b"), 0);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_column_id(), LocalColumnId(10));
+    EXPECT_EQ(request.column_predicate_filters[0].effective_file_child_id_path(),
+              std::vector<int32_t>({1}));
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_TRUE(request.predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.predicate_columns[0].children.empty());
+}
+
+// Scenario: element_at(struct, 'table_name') in a row filter is localized to the physical file
+// child name, matching the struct_element rewrite and nested predicate filter resolution paths.
+TEST(ColumnMapperScanRequestTest,
+     FileLocalElementAtConjunctUsesFileChildNameForRenamedStructField) {
+    const auto int_type = i32();
+
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_struct = struct_col("s", 10, {file_a, file_b}, 10);
+
+    auto child_expr = element_at(table_slot(0, 0, table_struct.type, table_struct.name), int_type,
+                                 "renamed_b");
+    auto filter_expr = int_gt(child_expr, 0);
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.conjuncts.size(), 1);
+    const auto& localized_child = request.conjuncts[0]->root()->children()[0];
+    EXPECT_EQ(localized_child->expr_name(), "element_at");
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_name(), "s");
+    EXPECT_EQ(localized_slot->column_id(), 0);
+
+    const auto* localized_literal =
+            assert_cast<const VLiteral*>(localized_child->children()[1].get());
+    Field localized_field;
+    localized_literal->get_column_ptr()->get(0, localized_field);
+    ASSERT_EQ(localized_field.get_type(), TYPE_STRING);
+    EXPECT_EQ(std::string(localized_field.as_string_view()), "b");
+}
+
+// Scenario: nested element_at(struct, name) localization rewrites both selector names and
+// intermediate return types. The outer selector must be prepared against the projected file child
+// struct, not the table child struct or the full historical file child struct.
+TEST(ColumnMapperScanRequestTest, NestedElementAtConjunctUsesFileChildTypeForRenamedLeaf) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto table_new_aa = field_id_col("new_aa", 23, int_type);
+    auto table_bb = field_id_col("bb", 24, string_type);
+    auto table_new_a = struct_col("new_a", 20, {table_new_aa, table_bb});
+    auto table_struct = struct_col("struct_column2", 19, {table_new_a});
+
+    auto file_aa = field_id_col("aa", 23, int_type, 0);
+    auto file_bb = field_id_col("bb", 24, string_type, 1);
+    auto file_new_a = struct_col("new_a", 20, {file_aa, file_bb}, 0);
+    auto file_struct = struct_col("struct_column2", 19, {file_new_a}, 10);
+
+    const auto table_slot_expr = table_slot(0, 0, table_struct.type, "struct_column2");
+    const auto table_parent_expr = element_at(table_slot_expr, table_new_a.type, "new_a");
+    const auto table_leaf_expr = element_at(table_parent_expr, int_type, "new_aa");
+    auto filter_expr = binary_predicate(TExprOpcode::EQ, table_leaf_expr,
+                                        literal(int_type, Field::create_field<TYPE_INT>(50)));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_struct}, &request).ok());
+    ASSERT_EQ(request.conjuncts.size(), 1);
+
+    const auto& localized_leaf = request.conjuncts[0]->root()->children()[0];
+    ASSERT_EQ(localized_leaf->expr_name(), "element_at");
+    const auto& localized_parent = localized_leaf->children()[0];
+    ASSERT_EQ(localized_parent->expr_name(), "element_at");
+
+    const auto* localized_leaf_selector =
+            assert_cast<const VLiteral*>(localized_leaf->children()[1].get());
+    Field localized_leaf_field;
+    localized_leaf_selector->get_column_ptr()->get(0, localized_leaf_field);
+    ASSERT_EQ(localized_leaf_field.get_type(), TYPE_STRING);
+    EXPECT_EQ(std::string(localized_leaf_field.as_string_view()), "aa");
+
+    const auto* localized_parent_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(localized_parent->data_type()).get());
+    ASSERT_EQ(localized_parent_type->get_elements().size(), 2);
+    EXPECT_EQ(localized_parent_type->get_element_name(0), "aa");
+    EXPECT_EQ(localized_parent_type->get_element_name(1), "bb");
+}
+
+// Scenario: output projection reads one struct child while the row filter reads a different nested
+// struct child. File-local conjunct rewrite must use the merged scan projection type. In the SQL
+// shape below, `SELECT element_at(s, 'c') WHERE element_at(element_at(s, 'b'), 'cc') LIKE ...`
+// reads file children `b.cc` and `c`; the localized inner `element_at(s, 'b')` returns
+// `Struct(cc)`, not the full old file child `Struct(cc, new_dd)`.
+TEST(ColumnMapperScanRequestTest, NestedElementAtConjunctUsesMergedScanProjectionChildType) {
+    const auto string_type = str();
+    const auto int_type = i32();
+
+    auto table_cc = field_id_col("cc", 23, string_type);
+    auto table_new_dd = field_id_col("new_dd", 24, int_type);
+    auto table_b = struct_col("b", 20, {table_cc, table_new_dd});
+    auto table_c = field_id_col("c", 25, string_type);
+    auto full_table_struct = struct_col("struct_column2", 19, {table_b, table_c});
+    auto projected_table_struct = struct_col("struct_column2", 19, {table_c});
+
+    auto file_cc = field_id_col("cc", 23, string_type, 0);
+    auto file_new_dd = field_id_col("new_dd", 24, int_type, 1);
+    auto file_b = struct_col("b", 20, {file_cc, file_new_dd}, 0);
+    auto file_c = field_id_col("c", 25, string_type, 1);
+    auto file_struct = struct_col("new_struct_column", 19, {file_b, file_c}, 10);
+
+    const auto table_slot_expr = table_slot(0, 0, full_table_struct.type, "struct_column2");
+    const auto table_parent_expr = element_at(table_slot_expr, table_b.type, "b");
+    const auto table_leaf_expr = element_at(table_parent_expr, string_type, "cc");
+    auto filter_expr = like_expr(table_leaf_expr, "NestedC%");
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({projected_table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {projected_table_struct}, &request).ok());
+    ASSERT_EQ(request.conjuncts.size(), 1);
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(10));
+
+    const auto& localized_leaf = request.conjuncts[0]->root()->children()[0];
+    ASSERT_EQ(localized_leaf->expr_name(), "element_at");
+    const auto& localized_parent = localized_leaf->children()[0];
+    ASSERT_EQ(localized_parent->expr_name(), "element_at");
+
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(localized_parent->children()[0].get());
+    EXPECT_EQ(localized_slot->column_name(), "new_struct_column");
+    // The scan projection keeps the top-level file column id above, while the localized conjunct
+    // executes on the file-reader Block. The VSlotRef column id is therefore the block position of
+    // `new_struct_column` in this request, not the file schema id 10.
+    EXPECT_EQ(localized_slot->column_id(), 0);
+
+    const auto* localized_parent_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(localized_parent->data_type()).get());
+    ASSERT_EQ(localized_parent_type->get_elements().size(), 1);
+    EXPECT_EQ(localized_parent_type->get_element_name(0), "cc");
+}
+
+// Scenario: struct child access through a computed map/array parent is not localized as a file
+// conjunct, because the projected value struct can have a different physical child order.
+TEST(ColumnMapperScanRequestTest, MapValuesStructChildConjunctStaysTableLevel) {
+    const auto key_type = str();
+    const auto string_type = str();
+    const auto int_type = i32();
+
+    auto table_gender = field_id_col("gender", 17, string_type);
+    auto table_full_name = field_id_col("full_name", 7, string_type);
+    auto table_value = struct_col("value", 6, {table_gender, table_full_name});
+    auto table_map = map_col("new_map_column", 2, {table_value}, key_type, table_value.type);
+
+    auto file_key = field_id_col("key", 5, key_type, 0);
+    auto file_age = field_id_col("age", 8, int_type, 0);
+    auto file_full_name = field_id_col("full_name", 7, string_type, 1);
+    auto file_gender = field_id_col("gender", 17, string_type, 2);
+    auto file_value = struct_col("value", 6, {file_age, file_full_name, file_gender}, 1);
+    auto file_map =
+            map_col("new_map_column", 2, {file_key, file_value}, key_type, file_value.type, 1);
+
+    const auto map_slot = table_slot(0, 0, table_map.type, "new_map_column");
+    const auto values_expr = map_values(map_slot, table_value.type);
+    const auto first_value = array_element_at(values_expr, table_value.type, 1);
+    const auto full_name_expr = element_at(first_value, string_type, "full_name");
+    auto filter_expr = like_expr(full_name_expr, "J%");
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok());
+
+    EXPECT_TRUE(request.conjuncts.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+    ASSERT_FALSE(request.predicate_columns[0].project_all_children);
+    ASSERT_EQ(request.predicate_columns[0].children.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].children[0].local_id(), 1);
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: MAP_KEYS only reads map keys, but localizing it by wrapping the evolved file map slot
+// in CAST(file_map AS table_map) would still cast the old value struct to the new value struct.
+// Keep the conjunct table-level when the map value schema changed.
+TEST(ColumnMapperScanRequestTest, MapKeysConjunctWithEvolvedValueStructStaysTableLevel) {
+    const auto key_type = str();
+    const auto string_type = str();
+    const auto int_type = i32();
+
+    auto table_age = field_id_col("age", 8, int_type);
+    auto table_full_name = field_id_col("full_name", 7, string_type);
+    auto table_gender = field_id_col("gender", 17, string_type);
+    auto table_value = struct_col("value", 6, {table_age, table_full_name, table_gender});
+    auto table_key = field_id_col("key", 5, key_type);
+    auto table_map =
+            map_col("new_map_column", 2, {table_key, table_value}, key_type, table_value.type);
+
+    auto file_key = field_id_col("key", 5, key_type, 0);
+    auto file_name = field_id_col("name", 18, string_type, 0);
+    auto file_age = field_id_col("age", 8, int_type, 1);
+    auto file_value = struct_col("value", 6, {file_name, file_age}, 1);
+    auto file_map = map_col("map_column", 2, {file_key, file_value}, key_type, file_value.type, 1);
+
+    const auto map_slot = table_slot(0, 0, table_map.type, "new_map_column");
+    const auto keys_expr = map_keys(map_slot, key_type);
+    auto filter_expr = array_contains(
+            keys_expr, literal(key_type, Field::create_field<TYPE_STRING>("person5")));
+    TableFilter filter {.conjunct = VExprContext::create_shared(filter_expr),
+                        .global_indices = {GlobalIndex(0)}};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({filter}, {}, {table_map}, &request).ok());
+
+    EXPECT_TRUE(request.conjuncts.empty());
+    EXPECT_TRUE(request.non_predicate_columns.empty());
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+    EXPECT_TRUE(request.column_predicate_filters.empty());
+}
+
+// Scenario: an array element struct projection only contains missing/default children; the mapper
+// falls back to reading the full physical element so the reader never gets an empty projection.
+TEST(ColumnMapperScanRequestTest, ArrayStructOnlyMissingElementChildUsesFullFileProjection) {
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_element = struct_col("element", 0, {file_a, file_b}, 0);
+    auto file_array = array_col("xs", 10, file_element, 10);
+
+    auto missing_child = field_id_col("missing_child", 99, string_type);
+    auto table_element = struct_col("element", 0, {missing_child});
+    auto table_array = array_col("xs", 10, table_element);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_array}, {}, {file_array}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_array}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(10));
+    EXPECT_TRUE(request.non_predicate_columns[0].project_all_children);
+    EXPECT_TRUE(request.non_predicate_columns[0].children.empty());
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_FALSE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: a map value struct projection only contains missing/default children; the mapper keeps
+// the map key/value shape and reads the full physical value struct instead of an empty value child.
+TEST(ColumnMapperScanRequestTest, MapValueStructOnlyMissingChildUsesFullValueProjection) {
+    const auto key_type = i32();
+    const auto int_type = i32();
+    const auto string_type = str();
+
+    auto file_key = field_id_col("key", 0, key_type, 0);
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, int_type, 1);
+    auto file_value = struct_col("value", 1, {file_a, file_b}, 1);
+    auto file_map = map_col("m", 10, {file_key, file_value}, key_type, file_value.type, 10);
+
+    auto missing_child = field_id_col("missing_child", 99, string_type);
+    auto table_value = struct_col("value", 1, {missing_child});
+    auto table_map = map_col("m", 10, {table_value}, key_type, table_value.type);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_map}, {}, {file_map}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_map}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(10));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 1);
+    EXPECT_EQ(projection.children[0].local_id(), 1);
+    EXPECT_TRUE(projection.children[0].project_all_children);
+    EXPECT_TRUE(projection.children[0].children.empty());
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_FALSE(mapper.mappings()[0].is_trivial);
+}
+
+// ----------------------------------------------------------------------
+// L1 complex schema evolution and split isolation.
+// These tests call the mapper repeatedly with different file schemas and
+// verify that split-local state is rebuilt instead of leaked.
+// ----------------------------------------------------------------------
+
+TEST(ColumnMapperSchemaEvolutionTest, StructChildrenHandleMissingRenameReorderAndDroppedFields) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_renamed_b = field_id_col("renamed_b", 2, string_type);
+    auto table_c = field_id_col("c", 3, int_type);
+    auto table_struct = struct_col("s", 10, {table_a, table_renamed_b, table_c});
+
+    auto v1_a = field_id_col("a", 1, int_type, 0);
+    auto v1_b = field_id_col("b", 2, string_type, 1);
+    auto file_v1 = struct_col("s", 10, {v1_a, v1_b}, 5);
+
+    auto v2_b = field_id_col("b", 2, string_type, 0);
+    auto v2_a = field_id_col("a", 1, int_type, 1);
+    auto v2_c = field_id_col("c", 3, int_type, 2);
+    auto file_v2 = struct_col("s", 10, {v2_b, v2_a, v2_c}, 8);
+
+    TableColumnMapper v1_mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(v1_mapper.create_mapping({table_struct}, {}, {file_v1}).ok());
+    FileScanRequest v1_request;
+    ASSERT_TRUE(v1_mapper.create_scan_request({}, {}, {table_struct}, &v1_request).ok());
+
+    const auto& v1_mapping = v1_mapper.mappings()[0];
+    ASSERT_EQ(v1_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(*v1_mapping.child_mappings[0].file_local_id, 0);
+    EXPECT_EQ(*v1_mapping.child_mappings[1].file_local_id, 1);
+    EXPECT_FALSE(v1_mapping.child_mappings[2].file_local_id.has_value());
+    ASSERT_EQ(v1_request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(v1_request.non_predicate_columns[0].column_id(), LocalColumnId(5));
+    EXPECT_TRUE(v1_request.non_predicate_columns[0].project_all_children);
+
+    TableColumnMapper v2_mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(v2_mapper.create_mapping({table_struct}, {}, {file_v2}).ok());
+    FileScanRequest v2_request;
+    ASSERT_TRUE(v2_mapper.create_scan_request({}, {}, {table_struct}, &v2_request).ok());
+
+    const auto& v2_mapping = v2_mapper.mappings()[0];
+    ASSERT_EQ(v2_mapping.child_mappings.size(), 3);
+    EXPECT_EQ(*v2_mapping.child_mappings[0].file_local_id, 1);
+    EXPECT_EQ(*v2_mapping.child_mappings[1].file_local_id, 0);
+    EXPECT_EQ(*v2_mapping.child_mappings[2].file_local_id, 2);
+    ASSERT_EQ(v2_request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(v2_request.non_predicate_columns[0].column_id(), LocalColumnId(8));
+    EXPECT_TRUE(v2_request.non_predicate_columns[0].project_all_children);
+}
+
+TEST(ColumnMapperSchemaEvolutionTest, DroppedStructChildrenAreNotRead) {
+    const auto int_type = i32();
+    const auto string_type = str();
+    auto table_a = field_id_col("a", 1, int_type);
+    auto table_struct = struct_col("s", 10, {table_a});
+
+    auto file_a = field_id_col("a", 1, int_type, 0);
+    auto file_b = field_id_col("b", 2, string_type, 1);
+    auto file_c = field_id_col("c", 3, int_type, 2);
+    auto file_struct = struct_col("s", 10, {file_a, file_b, file_c}, 5);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_FIELD_ID});
+    ASSERT_TRUE(mapper.create_mapping({table_struct}, {}, {file_struct}).ok());
+
+    FileScanRequest request;
+    ASSERT_TRUE(mapper.create_scan_request({}, {}, {table_struct}, &request).ok());
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(5));
+    ASSERT_FALSE(projection.project_all_children);
+    EXPECT_EQ(projection_ids(projection.children), std::vector<int32_t>({0}));
+}
+
+TEST(ColumnMapperSchemaEvolutionTest, ReusedMapperClearsSplitLocalConstantsAndFileIds) {
+    const auto int_type = i32();
+    auto id = name_col("id", int_type);
+    auto added = name_col("added", int_type);
+    added.default_expr =
+            VExprContext::create_shared(literal(int_type, Field::create_field<TYPE_INT>(7)));
+    const std::vector<ColumnDefinition> table_schema = {id, added};
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {}, {name_col("id", int_type, 0)}).ok());
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    EXPECT_EQ(*mapper.mappings()[0].file_local_id, 0);
+    expect_constant(mapper, mapper.mappings()[1], 1, int_type);
+
+    ASSERT_TRUE(mapper.create_mapping(table_schema, {},
+                                      {name_col("id", int_type, 3), name_col("added", int_type, 4)})
+                        .ok());
+    ASSERT_EQ(mapper.mappings().size(), 2);
+    EXPECT_EQ(*mapper.mappings()[0].file_local_id, 3);
+    EXPECT_EQ(*mapper.mappings()[1].file_local_id, 4);
+    EXPECT_TRUE(mapper.constant_map().empty());
+}
+
+// ----------------------------------------------------------------------
+// L2 cast-aware filter localization tests.
+// These tests belong to TableColumnMapper rather than Cast: they assert when the mapper builds
+// projection casts, rewrites table predicates to file-local slot casts, converts literals to the
+// current split's file type, and keeps repeated scan-request rewrites idempotent.
+// ----------------------------------------------------------------------
+
+// Scenario: table/file primitive types differ, so the visible mapping must build a cast projection.
+TEST_F(ColumnMapperCastTest, ColumnMapperBuildsCastProjectionForTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    FileScanRequest file_request;
+    status = mapper.create_scan_request({}, {}, projected_columns, &file_request);
+    ASSERT_TRUE(status.ok()) << status;
+    const auto& mapping = mapper.mappings()[0];
+    EXPECT_FALSE(mapping.is_trivial);
+    ASSERT_NE(mapping.projection, nullptr);
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 22}));
+    int result_column_id = -1;
+    status = prepare_open_execute(mapping.projection.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& result_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(result_column_id).column);
+    EXPECT_EQ(result_column.get_data()[0], 11);
+    EXPECT_EQ(result_column.get_data()[1], 22);
+
+    mapping.projection->close();
+}
+
+// Scenario: equivalent table/file types keep the mapping trivial and avoid unnecessary projection casts.
+TEST_F(ColumnMapperCastTest, ColumnMapperTreatsEquivalentTypesAsTrivial) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i32());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(mapper.mappings().size(), 1);
+    EXPECT_TRUE(mapper.mappings()[0].is_trivial);
+}
+
+// Scenario: a table predicate on a widened type is localized by casting the file slot to table type.
+TEST_F(ColumnMapperCastTest, ColumnMapperBuildsCastFilterForTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64ChildGreaterThanExpr>(15);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 1);
+    const auto& localized_child = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_child.get()), nullptr);
+    ASSERT_EQ(localized_child->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type));
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 22}));
+    auto* conjunct = file_request.conjuncts[0].get();
+    status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+
+    file_request.conjuncts[0]->close();
+}
+
+// Scenario: an already prepared table filter can still be cloned, rewritten, prepared, and opened as a file-local filter.
+TEST_F(ColumnMapperCastTest, ColumnMapperRepreparesRewrittenPreparedFilter) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto cast = Cast::create_shared(table_column.type);
+    cast->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(cast);
+    table_filter.global_indices = {GlobalIndex(0)};
+    status = table_filter.conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = table_filter.conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_expr.get()), nullptr);
+    ASSERT_EQ(localized_expr->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+
+    status = file_request.conjuncts[0]->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = file_request.conjuncts[0]->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+
+    file_request.conjuncts[0]->close();
+}
+
+// Scenario: slot-literal comparison rewrites the literal to the current file type when conversion is safe.
+TEST_F(ColumnMapperCastTest, ColumnMapperCastsLiteralForSlotLiteralPredicateTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::GT);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 2);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    const auto& localized_literal = localized_expr->children()[1];
+    EXPECT_TRUE(localized_literal->is_literal());
+    EXPECT_TRUE(localized_literal->data_type()->equals(*file_field.type));
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 22}));
+    auto* conjunct = file_request.conjuncts[0].get();
+    status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+
+    file_request.conjuncts[0]->close();
+}
+
+// Scenario: literal-slot comparison also rewrites the literal side and preserves operand order.
+TEST_F(ColumnMapperCastTest, ColumnMapperCastsLiteralForLiteralSlotPredicateTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::LT);
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 2);
+    const auto& localized_literal = localized_expr->children()[0];
+    EXPECT_TRUE(localized_literal->is_literal());
+    EXPECT_TRUE(localized_literal->data_type()->equals(*file_field.type));
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_expr->children()[1].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({11, 22}));
+    auto* conjunct = file_request.conjuncts[0].get();
+    status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+
+    file_request.conjuncts[0]->close();
+}
+
+// Scenario: IN predicate literals are all rewritten to file type when every literal conversion is safe.
+TEST_F(ColumnMapperCastTest, ColumnMapperCastsInPredicateLiteralsForTypeMismatch) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = create_in_predicate();
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(22)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    ASSERT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 3);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_expr->children()[2]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[2]->data_type()->equals(*file_field.type));
+}
+
+// Scenario: IN predicate falls back to casting the file slot when any literal cannot be converted safely.
+TEST_F(ColumnMapperCastTest, ColumnMapperFallsBackToSlotCastWhenInPredicateLiteralRewriteFails) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", str());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = create_in_predicate();
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_STRING>("10")));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_STRING>("bad")));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 3);
+    const auto& localized_child = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_child.get()), nullptr);
+    ASSERT_EQ(localized_child->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type));
+    EXPECT_TRUE(localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*table_column.type));
+    EXPECT_TRUE(localized_expr->children()[2]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[2]->data_type()->equals(*table_column.type));
+}
+
+// Scenario: split-local IN literal rewrites do not mutate the original table filter across different file schemas.
+TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotLeakRewrittenInPredicateLiteralAcrossSplits) {
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto predicate = create_in_predicate();
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(22)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    auto int_file_field = name_col("value", i32(), 0);
+    TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok());
+    FileScanRequest int_request;
+    ASSERT_TRUE(int_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &int_request,
+                                             &state)
+                        .ok());
+    ASSERT_EQ(int_request.conjuncts.size(), 1);
+    const auto& int_localized_expr = int_request.conjuncts[0]->root();
+    ASSERT_EQ(int_localized_expr->get_num_children(), 3);
+    EXPECT_TRUE(int_localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(int_localized_expr->children()[1]->data_type()->equals(*int_file_field.type));
+    EXPECT_TRUE(int_localized_expr->children()[2]->is_literal());
+    EXPECT_TRUE(int_localized_expr->children()[2]->data_type()->equals(*int_file_field.type));
+
+    auto bigint_file_field = name_col("value", i64(), 0);
+    TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok());
+    FileScanRequest bigint_request;
+    ASSERT_TRUE(bigint_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &bigint_request,
+                                             &state)
+                        .ok());
+    ASSERT_EQ(bigint_request.conjuncts.size(), 1);
+    const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root();
+    ASSERT_EQ(bigint_localized_expr->get_num_children(), 3);
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(bigint_localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type));
+    EXPECT_TRUE(bigint_localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(bigint_localized_expr->children()[1]->data_type()->equals(*bigint_file_field.type));
+    EXPECT_TRUE(bigint_localized_expr->children()[2]->is_literal());
+    EXPECT_TRUE(bigint_localized_expr->children()[2]->data_type()->equals(*bigint_file_field.type));
+}
+
+// Scenario: binary predicate falls back to casting the file slot when literal conversion fails.
+TEST_F(ColumnMapperCastTest, ColumnMapperFallsBackToSlotCastWhenLiteralRewriteFails) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", str());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::GT);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_STRING>("bad")));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 2);
+    const auto& localized_child = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_child.get()), nullptr);
+    ASSERT_EQ(localized_child->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+    EXPECT_TRUE(localized_child->data_type()->equals(*table_column.type));
+    EXPECT_TRUE(localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(localized_expr->children()[1]->data_type()->equals(*table_column.type));
+}
+
+// Scenario: split-local binary literal rewrite does not leak into a later split with a different file type.
+TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotLeakRewrittenLiteralAcrossSplits) {
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::GT);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    auto int_file_field = name_col("value", i32(), 0);
+    TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok());
+    FileScanRequest int_request;
+    ASSERT_TRUE(int_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &int_request,
+                                             &state)
+                        .ok());
+    ASSERT_EQ(int_request.conjuncts.size(), 1);
+    const auto& int_localized_expr = int_request.conjuncts[0]->root();
+    ASSERT_EQ(int_localized_expr->get_num_children(), 2);
+    EXPECT_TRUE(int_localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(int_localized_expr->children()[1]->data_type()->equals(*int_file_field.type));
+
+    auto bigint_file_field = name_col("value", i64(), 0);
+    TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok());
+    FileScanRequest bigint_request;
+    ASSERT_TRUE(bigint_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &bigint_request,
+                                             &state)
+                        .ok());
+    ASSERT_EQ(bigint_request.conjuncts.size(), 1);
+    const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root();
+    ASSERT_EQ(bigint_localized_expr->get_num_children(), 2);
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(bigint_localized_expr->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type));
+    EXPECT_TRUE(bigint_localized_expr->children()[1]->is_literal());
+    EXPECT_TRUE(bigint_localized_expr->children()[1]->data_type()->equals(*bigint_file_field.type));
+}
+
+// Scenario: an explicit user/table cast is preserved while the underlying slot is localized correctly.
+TEST_F(ColumnMapperCastTest, ColumnMapperKeepsExplicitSlotCastInSlotLiteralPredicate) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto explicit_cast = Cast::create_shared(std::make_shared<DataTypeString>());
+    explicit_cast->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    auto predicate = std::make_shared<Int64BinaryPredicateExpr>(TExprOpcode::GT);
+    predicate->add_child(explicit_cast);
+    predicate->add_child(
+            VLiteral::create_shared(table_column.type, Field::create_field<TYPE_BIGINT>(15)));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request, &state)
+                    .ok());
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto& localized_expr = file_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 2);
+    const auto& localized_cast = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_cast.get()), nullptr);
+    EXPECT_TRUE(localized_cast->data_type()->equals(DataTypeString()));
+    ASSERT_EQ(localized_cast->get_num_children(), 1);
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_cast->children()[0].get()), nullptr);
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(localized_cast->children()[0]->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*file_field.type));
+}
+
+// Scenario: repeated scan request creation stays idempotent and does not wrap Cast(Cast(slot)).
+TEST_F(ColumnMapperCastTest, ColumnMapperDoesNotNestCastFilterAcrossScanRequests) {
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i32(), 0);
+    std::vector<ColumnDefinition> file_schema {file_field};
+
+    auto status = mapper.create_mapping(projected_columns, {}, file_schema);
+    ASSERT_TRUE(status.ok()) << status;
+
+    auto predicate = std::make_shared<Int64ChildGreaterThanExpr>(15);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest first_request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, projected_columns, &first_request,
+                                           &state)
+                        .ok());
+    FileScanRequest second_request;
+    ASSERT_TRUE(mapper.create_scan_request({table_filter}, {}, projected_columns, &second_request,
+                                           &state)
+                        .ok());
+
+    ASSERT_EQ(second_request.conjuncts.size(), 1);
+    const auto& localized_expr = second_request.conjuncts[0]->root();
+    ASSERT_EQ(localized_expr->get_num_children(), 1);
+    const auto& localized_child = localized_expr->children()[0];
+    ASSERT_NE(dynamic_cast<const Cast*>(localized_child.get()), nullptr);
+    ASSERT_EQ(localized_child->get_num_children(), 1);
+    const auto* localized_slot = assert_cast<const VSlotRef*>(localized_child->children()[0].get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+}
+
+// Scenario: a filter cloned from a previous cast rewrite is adjusted to the next split's matching file type.
+TEST_F(ColumnMapperCastTest, ColumnMapperRewritesPreviousCastFilterToMatchingSplitType) {
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto predicate = std::make_shared<Int64ChildGreaterThanExpr>(15);
+    predicate->add_child(VSlotRef::create_shared(0, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    auto int_file_field = name_col("value", i32(), 0);
+
+    TableColumnMapper int_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(int_mapper.create_mapping(projected_columns, {}, {int_file_field}).ok());
+    FileScanRequest int_request;
+    ASSERT_TRUE(int_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &int_request,
+                                             &state)
+                        .ok());
+
+    const auto& int_localized_expr = int_request.conjuncts[0]->root();
+    ASSERT_EQ(int_localized_expr->get_num_children(), 1);
+    ASSERT_NE(dynamic_cast<const Cast*>(int_localized_expr->children()[0].get()), nullptr);
+
+    auto bigint_file_field = name_col("value", i64(), 0);
+
+    TableColumnMapper bigint_mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(bigint_mapper.create_mapping(projected_columns, {}, {bigint_file_field}).ok());
+    FileScanRequest bigint_request;
+    ASSERT_TRUE(bigint_mapper
+                        .create_scan_request({table_filter}, {}, projected_columns, &bigint_request,
+                                             &state)
+                        .ok());
+
+    const auto& bigint_localized_expr = bigint_request.conjuncts[0]->root();
+    ASSERT_EQ(bigint_localized_expr->get_num_children(), 1);
+    const auto& bigint_localized_child = bigint_localized_expr->children()[0];
+    const auto* localized_slot = assert_cast<const VSlotRef*>(bigint_localized_child.get());
+    EXPECT_EQ(localized_slot->column_id(), 0);
+    EXPECT_TRUE(localized_slot->data_type()->equals(*bigint_file_field.type));
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt64>({11, 22}));
+    auto* conjunct = bigint_request.conjuncts[0].get();
+    auto status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+    conjunct->close();
+}
+
+// Scenario: localized slot keeps table slot id while column id tracks the file block position.
+TEST_F(ColumnMapperCastTest, ColumnMapperKeepsTableSlotIdWhenFileBlockPositionChanges) {
+    auto table_column = name_col("value", i64());
+    std::vector<ColumnDefinition> projected_columns {table_column};
+
+    auto file_field = name_col("value", i64(), 10);
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, {file_field}).ok());
+
+    auto predicate = std::make_shared<Int64ChildGreaterThanExpr>(15);
+    predicate->add_child(VSlotRef::create_shared(7, 0, -1, table_column.type, "value"));
+    TableFilter table_filter;
+    table_filter.conjunct = VExprContext::create_shared(predicate);
+    table_filter.global_indices = {GlobalIndex(0)};
+
+    FileScanRequest first_request;
+    ASSERT_TRUE(mapper.localize_filters({table_filter}, {}, &first_request, &state).ok());
+    ASSERT_EQ(first_request.conjuncts.size(), 1);
+    const auto* first_slot =
+            assert_cast<const VSlotRef*>(first_request.conjuncts[0]->root()->children()[0].get());
+    EXPECT_EQ(first_slot->slot_id(), 7);
+    EXPECT_EQ(first_slot->column_id(), 0);
+
+    FileScanRequest second_request;
+    second_request.local_positions.emplace(LocalColumnId(9), LocalIndex(0));
+    second_request.local_positions.emplace(LocalColumnId(10), LocalIndex(1));
+    second_request.non_predicate_columns.push_back(LocalColumnIndex::top_level(LocalColumnId(9)));
+    ASSERT_TRUE(mapper.localize_filters({table_filter}, {}, &second_request, &state).ok());
+    ASSERT_EQ(second_request.conjuncts.size(), 1);
+    const auto* second_slot =
+            assert_cast<const VSlotRef*>(second_request.conjuncts[0]->root()->children()[0].get());
+    EXPECT_EQ(second_slot->slot_id(), 7);
+    EXPECT_EQ(second_slot->column_id(), 1);
+
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt64>({100, 100}));
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt64>({11, 22}));
+    auto* conjunct = second_request.conjuncts[0].get();
+    auto status = conjunct->prepare(&state, RowDescriptor());
+    ASSERT_TRUE(status.ok()) << status;
+    status = conjunct->open(&state);
+    ASSERT_TRUE(status.ok()) << status;
+    IColumn::Filter filter(block.rows(), 1);
+    bool can_filter_all = false;
+    status = conjunct->execute_filter(&block, filter.data(), block.rows(), false, &can_filter_all);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_FALSE(can_filter_all);
+    ASSERT_EQ(filter.size(), 2);
+    EXPECT_EQ(filter[0], 0);
+    EXPECT_EQ(filter[1], 1);
+    conjunct->close();
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/delimited_text/csv_reader_test.cpp b/be/test/format_v2/delimited_text/csv_reader_test.cpp
new file mode 100644
index 00000000000000..7c787de7f8c09a
--- /dev/null
+++ b/be/test/format_v2/delimited_text/csv_reader_test.cpp
@@ -0,0 +1,1070 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/csv_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+
+#include "common/consts.h"
+#include "common/object_pool.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/column_mapper.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+#include "testutil/desc_tbl_builder.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format::csv {
+namespace {
+
+TFileScanRangeParams csv_scan_params() {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN);
+    params.__set_file_type(TFileType::FILE_LOCAL);
+    TFileAttributes attributes;
+    TFileTextScanRangeParams text_params;
+    text_params.__set_column_separator(",");
+    text_params.__set_line_delimiter("\n");
+    attributes.__set_text_params(std::move(text_params));
+    attributes.__set_header_type(BeConsts::CSV_WITH_NAMES);
+    params.__set_file_attributes(std::move(attributes));
+    params.__set_column_idxs({0, 1, 2});
+    return params;
+}
+
+std::unique_ptr<io::FileDescription> file_description(const std::string& path,
+                                                      int64_t range_start_offset = 0,
+                                                      int64_t range_size = -1) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->range_start_offset = range_start_offset;
+    desc->range_size = range_size;
+    desc->file_size = static_cast<int64_t>(std::filesystem::file_size(path));
+    return desc;
+}
+
+std::unique_ptr<io::FileDescription> unknown_size_file_description(const std::string& path) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->range_start_offset = 0;
+    desc->range_size = -1;
+    desc->file_size = -1;
+    return desc;
+}
+
+std::vector<SlotDescriptor*> build_slots(ObjectPool* pool) {
+    DescriptorTblBuilder builder(pool);
+    builder.declare_tuple()
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeInt32>()), "id"}
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeString>()),
+                                           "name"}
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeInt32>()),
+                                           "score"};
+    auto* desc_tbl = builder.build();
+    return desc_tbl->get_tuple_descriptor(0)->slots();
+}
+
+SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type,
+                               const std::string& name) {
+    TSlotDescriptor slot_desc;
+    slot_desc.__set_id(slot_id);
+    slot_desc.__set_parent(0);
+    slot_desc.__set_slotType(type->to_thrift());
+    slot_desc.__set_columnPos(slot_idx);
+    slot_desc.__set_byteOffset(0);
+    slot_desc.__set_nullIndicatorByte(slot_idx / 8);
+    slot_desc.__set_nullIndicatorBit(slot_idx % 8);
+    slot_desc.__set_slotIdx(slot_idx);
+    slot_desc.__set_isMaterialized(true);
+    slot_desc.__set_colName(name);
+    return pool->add(new SlotDescriptor(slot_desc));
+}
+
+std::vector<SlotDescriptor*> build_struct_slots(ObjectPool* pool) {
+    const auto nullable_int = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int, nullable_int}, Strings {"a", "b"}));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, struct_type, "s"),
+            make_test_slot(pool, 2, 2, make_nullable(std::make_shared<DataTypeInt32>()), "score")};
+}
+
+std::vector<SlotDescriptor*> build_nested_complex_slots(ObjectPool* pool) {
+    const auto nullable_int = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto nullable_string = make_nullable(std::make_shared<DataTypeString>());
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int, nullable_string}, Strings {"a", "b"}));
+    const auto array_type = make_nullable(std::make_shared<DataTypeArray>(struct_type));
+    const auto map_type =
+            make_nullable(std::make_shared<DataTypeMap>(nullable_string, struct_type));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, array_type, "xs"),
+            make_test_slot(pool, 2, 2, map_type, "kv")};
+}
+
+std::vector<SlotDescriptor*> build_char_varchar_slots(ObjectPool* pool) {
+    const auto nullable_char3 =
+            make_nullable(std::make_shared<DataTypeString>(3, PrimitiveType::TYPE_CHAR));
+    const auto nullable_varchar4 =
+            make_nullable(std::make_shared<DataTypeString>(4, PrimitiveType::TYPE_VARCHAR));
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_char3, nullable_varchar4}, Strings {"city", "country"}));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, nullable_char3, "city"),
+            make_test_slot(pool, 2, 2, struct_type, "region")};
+}
+
+std::unique_ptr<CsvReader> create_reader(
+        const std::string& path, TFileScanRangeParams* params,
+        const std::vector<SlotDescriptor*>& slots, MockRuntimeState* state, RuntimeProfile* profile,
+        int64_t range_start_offset = 0, int64_t range_size = -1,
+        TFileCompressType::type range_compress_type = TFileCompressType::UNKNOWN,
+        std::shared_ptr<io::IOContext> io_ctx = nullptr) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(path, range_start_offset, range_size);
+    auto reader = std::make_unique<CsvReader>(system_properties, desc, std::move(io_ctx), profile,
+                                              params, slots, range_compress_type);
+    EXPECT_TRUE(reader->init(state).ok());
+    return reader;
+}
+
+std::unique_ptr<CsvReader> create_unknown_size_reader(const std::string& path,
+                                                      TFileScanRangeParams* params,
+                                                      const std::vector<SlotDescriptor*>& slots,
+                                                      MockRuntimeState* state,
+                                                      RuntimeProfile* profile) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = unknown_size_file_description(path);
+    auto reader =
+            std::make_unique<CsvReader>(system_properties, desc, nullptr, profile, params, slots);
+    EXPECT_TRUE(reader->init(state).ok());
+    return reader;
+}
+
+Block make_block(const std::vector<ColumnDefinition>& schema,
+                 const std::vector<int32_t>& local_ids) {
+    Block block;
+    for (const auto local_id : local_ids) {
+        const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) {
+            return column.local_id == local_id;
+        });
+        EXPECT_TRUE(it != schema.end());
+        block.insert({it->type->create_column(), it->type, it->name});
+    }
+    return block;
+}
+
+std::string nullable_string_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    return nested.get_data_at(row).to_string();
+}
+
+bool is_null_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    return nullable.is_null_at(row);
+}
+
+int32_t nullable_int_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+int32_t nullable_struct_int_child_at(const IColumn& column, size_t child_index, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& struct_column = assert_cast<const ColumnStruct&>(nullable.get_nested_column());
+    const auto& child_nullable =
+            assert_cast<const ColumnNullable&>(struct_column.get_column(child_index));
+    const auto& nested = assert_cast<const ColumnInt32&>(child_nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+int64_t counter_value(RuntimeProfile* profile, const std::string& name) {
+    auto* counter = profile->get_counter(name);
+    EXPECT_NE(counter, nullptr) << name;
+    return counter == nullptr ? 0 : counter->value();
+}
+
+class NullableIntGreaterThanExpr final : public VExpr {
+public:
+    NullableIntGreaterThanExpr(size_t block_position, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& data = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    !nullable.is_null_at(source_row) && data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<NullableIntGreaterThanExpr>(_block_position, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    int32_t _value;
+    const std::string _name = "NullableIntGreaterThanExpr";
+};
+
+class StructIntChildGreaterThanExpr final : public VExpr {
+public:
+    StructIntChildGreaterThanExpr(size_t block_position, size_t child_index, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _child_index(child_index),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& struct_column = assert_cast<const ColumnStruct&>(nullable.get_nested_column());
+        const auto& child_nullable =
+                assert_cast<const ColumnNullable&>(struct_column.get_column(_child_index));
+        const auto& child_data =
+                assert_cast<const ColumnInt32&>(child_nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& data = result->get_data();
+        data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            data[row] = !nullable.is_null_at(source_row) &&
+                        !child_nullable.is_null_at(source_row) &&
+                        child_data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<StructIntChildGreaterThanExpr>(_block_position,
+                                                                       _child_index, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    size_t _child_index;
+    int32_t _value;
+    const std::string _name = "StructIntChildGreaterThanExpr";
+};
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto context = VExprContext::create_shared(expr);
+    auto status = context->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = context->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return context;
+}
+
+class CsvV2ReaderTest : public testing::Test {
+public:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_csv_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "reader.csv").string();
+        std::ofstream output(_file_path, std::ios::binary);
+        output << "id,name,score\n";
+        output << "1,alice,10\n";
+        output << "2,bob,20\n";
+        output.close();
+        _slots = build_slots(&_pool);
+        _params = csv_scan_params();
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+protected:
+    ObjectPool _pool;
+    MockRuntimeState _state;
+    RuntimeProfile _profile {"csv_v2_reader_test"};
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+    std::vector<SlotDescriptor*> _slots;
+    TFileScanRangeParams _params;
+};
+
+// Scenario: CSV v2 exposes FE-provided file slots as nullable file-local schema using column_idxs
+// as CSV field ordinals.
+TEST_F(CsvV2ReaderTest, SchemaUsesSlotTypesAndColumnIdxs) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    EXPECT_EQ(schema[0].name, "id");
+    EXPECT_EQ(schema[0].local_id, 0);
+    EXPECT_TRUE(schema[0].type->is_nullable());
+    EXPECT_EQ(schema[1].name, "name");
+    EXPECT_EQ(schema[1].local_id, 1);
+    EXPECT_TRUE(schema[1].type->is_nullable());
+}
+
+// Scenario: FE slot types for CSV are table target types. CHAR/VARCHAR length is not stored in the
+// CSV file, so the file schema must expose bounded strings as unbounded STRING. Otherwise
+// TableReader believes the file value already satisfies the table length and skips truncation.
+TEST_F(CsvV2ReaderTest, SchemaTreatsCharVarcharSlotsAsUnboundedFileStrings) {
+    auto slots = build_char_varchar_slots(&_pool);
+    auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    const auto city_type = remove_nullable(schema[1].type);
+    EXPECT_EQ(city_type->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(assert_cast<const DataTypeString*>(city_type.get())->len(), -1);
+
+    const auto region_type = remove_nullable(schema[2].type);
+    ASSERT_EQ(region_type->get_primitive_type(), TYPE_STRUCT);
+    const auto* region_struct = assert_cast<const DataTypeStruct*>(region_type.get());
+    ASSERT_EQ(region_struct->get_elements().size(), 2);
+    EXPECT_EQ(remove_nullable(region_struct->get_element(0))->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(region_struct->get_element(1))->get_primitive_type(), TYPE_STRING);
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(remove_nullable(schema[2].children[0].type)->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(schema[2].children[1].type)->get_primitive_type(), TYPE_STRING);
+}
+
+// Scenario: CSV is row-oriented and cannot lazy-read predicate columns separately. The reader
+// declares that capability by choosing MaterializedColumnMapper itself.
+TEST_F(CsvV2ReaderTest, CreatesMaterializedColumnMapper) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto mapper = reader->create_column_mapper({.mode = TableColumnMappingMode::BY_NAME});
+
+    ASSERT_NE(dynamic_cast<MaterializedColumnMapper*>(mapper.get()), nullptr);
+}
+
+// Scenario: CSV v2 exposes delimited-text profile counters for read, parse, deserialize, and
+// file-local conjunct filtering, so scanner profiles can explain where row-reader time is spent.
+TEST_F(CsvV2ReaderTest, ProfileCountersTrackReadParseDeserializeAndFilter) {
+    const auto profile_path = (_test_dir / "profile.csv").string();
+    std::ofstream output(profile_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "\n";
+    output << "1,alice,10\n";
+    output << "2,bob,20\n";
+    output.close();
+
+    _state._query_options.__set_read_csv_empty_line_as_null(true);
+    auto io_ctx = std::make_shared<io::IOContext>();
+    auto reader = create_reader(profile_path, &_params, _slots, &_state, &_profile, 0, -1,
+                                TFileCompressType::UNKNOWN, io_ctx);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(1));
+    request->conjuncts = {
+            prepared_conjunct(&_state, std::make_shared<NullableIntGreaterThanExpr>(1, 15))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2);
+
+    EXPECT_NE(_profile.get_counter("OpenFileTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("CreateLineReaderTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("ReadLineTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("SplitLineTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("DeserializeTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("ConjunctFilterTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("DeleteConjunctFilterTime"), nullptr);
+    EXPECT_EQ(counter_value(&_profile, "RawLinesRead"), 3);
+    EXPECT_EQ(counter_value(&_profile, "RowsReadBeforeFilter"), 3);
+    EXPECT_EQ(counter_value(&_profile, "RowsFilteredByConjunct"), 2);
+    EXPECT_EQ(io_ctx->predicate_filtered_rows, 2);
+    EXPECT_EQ(counter_value(&_profile, "RowsFilteredByDeleteConjunct"), 0);
+    EXPECT_EQ(counter_value(&_profile, "RowsReturned"), 1);
+    EXPECT_EQ(counter_value(&_profile, "EmptyLinesRead"), 1);
+    EXPECT_EQ(counter_value(&_profile, "SkippedLines"), 1);
+    EXPECT_EQ(counter_value(&_profile, "CellsDeserialized"), 6);
+}
+
+// Scenario: CSV has no embedded nested schema, but TableColumnMapper still needs semantic children
+// for complex table columns. The reader synthesizes ARRAY/MAP/STRUCT children from the slot type
+// while keeping the top-level local id as the CSV field ordinal from column_idxs.
+TEST_F(CsvV2ReaderTest, SchemaSynthesizesComplexChildrenForColumnMapper) {
+    _params.__set_column_idxs({4, 7, 9});
+    auto slots = build_nested_complex_slots(&_pool);
+    auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile);
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    EXPECT_EQ(schema[1].name, "xs");
+    EXPECT_EQ(schema[1].local_id, 7);
+    ASSERT_EQ(schema[1].children.size(), 1);
+    EXPECT_EQ(schema[1].children[0].name, "element");
+    EXPECT_EQ(schema[1].children[0].local_id, 0);
+    ASSERT_EQ(schema[1].children[0].children.size(), 2);
+    EXPECT_EQ(schema[1].children[0].children[0].name, "a");
+    EXPECT_EQ(schema[1].children[0].children[0].local_id, 0);
+    EXPECT_EQ(schema[1].children[0].children[1].name, "b");
+    EXPECT_EQ(schema[1].children[0].children[1].local_id, 1);
+
+    EXPECT_EQ(schema[2].name, "kv");
+    EXPECT_EQ(schema[2].local_id, 9);
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(schema[2].children[0].name, "key");
+    EXPECT_EQ(schema[2].children[0].local_id, 0);
+    EXPECT_EQ(schema[2].children[1].name, "value");
+    EXPECT_EQ(schema[2].children[1].local_id, 1);
+    ASSERT_EQ(schema[2].children[1].children.size(), 2);
+    EXPECT_EQ(schema[2].children[1].children[0].name, "a");
+    EXPECT_EQ(schema[2].children[1].children[1].name, "b");
+}
+
+// Scenario: CSV v2 honors FileScanRequest local positions, so TableReader can request a subset of
+// CSV fields in an order different from the physical CSV field order.
+TEST_F(CsvV2ReaderTest, ReadsRequestedColumnsInFileLocalBlockOrder) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice");
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "bob");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 1), 2);
+}
+
+// Scenario: CSV v2 defaults to the same strict UTF-8 validation as the old query reader. Invalid
+// bytes should fail fast unless the scan params explicitly disable text UTF-8 validation.
+TEST_F(CsvV2ReaderTest, InvalidUtf8FailsWhenValidationEnabled) {
+    const auto invalid_path = (_test_dir / "invalid_utf8.csv").string();
+    std::ofstream output(invalid_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,";
+    output.write("\xff", 1);
+    output << ",10\n";
+    output.close();
+
+    auto reader = create_reader(invalid_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    const auto status = reader->get_block(&block, &rows, &eof);
+    EXPECT_FALSE(status.ok());
+    EXPECT_TRUE(status.to_string().find("Only support csv data in utf8 codec") != std::string::npos)
+            << status;
+}
+
+// Scenario: external CSV scans can opt out of UTF-8 validation through
+// `enable_text_validate_utf8=false`. In that mode the reader preserves the original bytes instead
+// of rejecting the row.
+TEST_F(CsvV2ReaderTest, DisableTextValidateUtf8ReadsRawBytes) {
+    const auto invalid_path = (_test_dir / "invalid_utf8_disabled.csv").string();
+    std::ofstream output(invalid_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,";
+    output.write("\xff", 1);
+    output << ",10\n";
+    output.close();
+
+    _params.file_attributes.__set_enable_text_validate_utf8(false);
+    auto reader = create_reader(invalid_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), std::string("\xff", 1));
+}
+
+// Scenario: file TVF can keep the logical CSV format as FORMAT_CSV_PLAIN and put the actual gzip
+// compression on the scan range. CSV v2 must honor that range-level compression before validating
+// UTF-8; otherwise the gzip bytes are misread as CSV text.
+TEST_F(CsvV2ReaderTest, RangeCompressTypeGzipDecompressesPlainCsvFormat) {
+    const auto gz_path = (_test_dir / "reader.csv.gz").string();
+    static constexpr unsigned char gzipped_csv[] = {
+            0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcb, 0x4c,
+            0xd1, 0xc9, 0x4b, 0xcc, 0x4d, 0xd5, 0x29, 0x4e, 0xce, 0x2f, 0x4a, 0xe5,
+            0x32, 0xd4, 0x49, 0xcc, 0xc9, 0x4c, 0x4e, 0xd5, 0x31, 0x34, 0xe0, 0x02,
+            0x00, 0x0b, 0xed, 0x5c, 0xa2, 0x19, 0x00, 0x00, 0x00};
+    std::ofstream output(gz_path, std::ios::binary);
+    output.write(reinterpret_cast<const char*>(gzipped_csv), sizeof(gzipped_csv));
+    output.close();
+
+    _params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN);
+    _params.__isset.compress_type = false;
+    auto reader = create_reader(gz_path, &_params, _slots, &_state, &_profile, 0, -1,
+                                TFileCompressType::GZ);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "alice");
+}
+
+// Scenario: FE column_idxs define the CSV field ordinal for each physical file slot. The mapping
+// can be non-identity when FE reorders projected file slots, so the reader must use the local id
+// from FileScanRequest instead of the slot vector position.
+TEST_F(CsvV2ReaderTest, ColumnIdxsMapSlotsToCsvOrdinals) {
+    const auto remap_path = (_test_dir / "remapped.csv").string();
+    std::ofstream output(remap_path, std::ios::binary);
+    output << "name,score,id\n";
+    output << "alice,10,1\n";
+    output.close();
+
+    _params.__set_column_idxs({2, 0, 1});
+    auto reader = create_reader(remap_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    EXPECT_EQ(schema[0].name, "id");
+    EXPECT_EQ(schema[0].local_id, 2);
+    EXPECT_EQ(schema[1].name, "name");
+    EXPECT_EQ(schema[1].local_id, 0);
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2)),
+                                      LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {2, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "alice");
+}
+
+// Scenario: CSV stores one complex column as one text field, so v2 must read the whole struct
+// field before evaluating a file-local predicate on one child. This covers `SELECT s.a WHERE
+// s.b > 10` style scans after CsvReader's MaterializedColumnMapper has requested the full
+// top-level `s`.
+TEST_F(CsvV2ReaderTest, FullStructColumnSupportsChildConjunctFiltering) {
+    const auto complex_path = (_test_dir / "complex.csv").string();
+    std::ofstream output(complex_path, std::ios::binary);
+    output << "id|s|score\n";
+    output << "1|{\"a\": 11, \"b\": 5}|10\n";
+    output << "2|{\"a\": 22, \"b\": 20}|20\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_column_separator("|");
+    _params.__set_column_idxs({0, 1, 2});
+    auto slots = build_struct_slots(&_pool);
+    auto reader = create_reader(complex_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->conjuncts = {prepared_conjunct(
+            &_state, std::make_shared<StructIntChildGreaterThanExpr>(
+                             /*block_position=*/0, /*child_index=*/1, /*value=*/10))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 0, 0), 22);
+    EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 1, 0), 20);
+}
+
+// Scenario: a table-level scan can need only partition/default columns, leaving the CSV
+// FileScanRequest with no file-local columns. The reader must still report the number of rows read.
+TEST_F(CsvV2ReaderTest, EmptyFileLocalProjectionStillReportsRows) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    Block block;
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_EQ(rows, 2);
+    EXPECT_FALSE(eof);
+}
+
+// Scenario: stream-load/http_stream inputs do not have a known split size or file size. A first
+// split must still read until EOF instead of rejecting the request before opening the stream.
+TEST_F(CsvV2ReaderTest, UnknownFirstSplitSizeReadsUntilEof) {
+    auto reader = create_unknown_size_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 1), "bob");
+}
+
+// Scenario: stream load/http_stream CSV input is not backed by a filesystem. If TableReader fails
+// to preserve the stream load id, the v2 reader should report that directly instead of calling the
+// generic FileFactory path and returning "unsupported file reader type: 2".
+TEST_F(CsvV2ReaderTest, StreamInputRequiresLoadIdBeforeOpeningPipe) {
+    _params.__set_file_type(TFileType::FILE_STREAM);
+    auto reader = create_unknown_size_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    const auto status = reader->open(request);
+
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("stream reader requires load id"), std::string::npos)
+            << status;
+}
+
+// Scenario: CSV has no footer row count, so v2 COUNT pushdown scans the split and returns the
+// counted row count through FileAggregateResult.
+TEST_F(CsvV2ReaderTest, CountAggregateScansRows) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::COUNT;
+    FileAggregateResult aggregate_result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+    EXPECT_EQ(aggregate_result.count, 2);
+}
+
+// Scenario: CSV v2 parses enclosed fields itself instead of delegating to the old CsvReader. A
+// separator inside an enclosed string must stay inside the same CSV field.
+TEST_F(CsvV2ReaderTest, EnclosedFieldKeepsSeparatorInsideStringValue) {
+    const auto quoted_path = (_test_dir / "quoted.csv").string();
+    std::ofstream output(quoted_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,\"alice,team\",10\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_enclose('"');
+    _params.file_attributes.text_params.__set_escape('\\');
+    auto reader = create_reader(quoted_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice,team");
+}
+
+// Scenario: when the CSV row has fewer fields than the FE-provided file slot list, v2 fills the
+// missing requested field with NULL instead of failing or shifting later columns.
+TEST_F(CsvV2ReaderTest, MissingRequestedFieldUsesNullFormat) {
+    const auto missing_path = (_test_dir / "missing.csv").string();
+    std::ofstream output(missing_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,alice\n";
+    output.close();
+
+    auto reader = create_reader(missing_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+}
+
+// Scenario: the first line may contain UTF-8 BOM and CSV_WITH_NAMES_AND_TYPES has two header
+// records. Both must be skipped before materializing the first data row.
+TEST_F(CsvV2ReaderTest, HeaderNamesAndTypesSkipsTwoLinesAndBom) {
+    const auto header_path = (_test_dir / "header_names_types.csv").string();
+    std::ofstream output(header_path, std::ios::binary);
+    output.write("\xEF\xBB\xBF", 3);
+    output << "id,name,score\n";
+    output << "INT,STRING,INT\n";
+    output << "7,carol,70\n";
+    output.close();
+
+    _params.file_attributes.__set_header_type(BeConsts::CSV_WITH_NAMES_AND_TYPES);
+    auto reader = create_reader(header_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 7);
+}
+
+// Scenario: when the first returned data line starts with UTF-8 BOM, CSV v2 strips the BOM before
+// passing the cell to the serde. This matters for headerless files whose first column is numeric.
+TEST_F(CsvV2ReaderTest, BomIsRemovedFromFirstDataLineWithoutHeader) {
+    const auto bom_path = (_test_dir / "bom_data.csv").string();
+    std::ofstream output(bom_path, std::ios::binary);
+    output.write("\xEF\xBB\xBF", 3);
+    output << "5,bom,50\n";
+    output.close();
+
+    _params.file_attributes.__isset.header_type = false;
+    auto reader = create_reader(bom_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 5);
+}
+
+// Scenario: when FE does not set header_type, CSV v2 must honor skip_lines exactly as the old
+// reader does.
+TEST_F(CsvV2ReaderTest, SkipLinesUsedWhenHeaderTypeUnset) {
+    const auto skip_path = (_test_dir / "skip_lines.csv").string();
+    std::ofstream output(skip_path, std::ios::binary);
+    output << "skip me\n";
+    output << "skip me too\n";
+    output << "3,dan,30\n";
+    output.close();
+
+    _params.file_attributes.__isset.header_type = false;
+    _params.file_attributes.__set_skip_lines(2);
+    auto reader = create_reader(skip_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 3);
+}
+
+// Scenario: empty physical lines are skipped by default, but read_csv_empty_line_as_null turns one
+// empty line into one all-null logical row.
+TEST_F(CsvV2ReaderTest, EmptyLineAsNullWhenQueryOptionEnabled) {
+    const auto empty_line_path = (_test_dir / "empty_line.csv").string();
+    std::ofstream output(empty_line_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "\n";
+    output << "4,erin,40\n";
+    output.close();
+
+    _state._query_options.__set_read_csv_empty_line_as_null(true);
+    auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 1), 4);
+}
+
+// Scenario: FE-provided CSV text parameters define NULL semantics. Explicit null_format and
+// empty_field_as_null should both produce nullable values without throwing serde errors.
+TEST_F(CsvV2ReaderTest, NullFormatAndEmptyFieldAsNullProduceNullableValues) {
+    const auto null_path = (_test_dir / "null_format.csv").string();
+    std::ofstream output(null_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,NULL,\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_null_format("NULL");
+    _params.file_attributes.text_params.__set_empty_field_as_null(true);
+    auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_TRUE(is_null_at(*block.get_by_position(1).column, 0));
+}
+
+// Scenario: OpenCSV keeps an empty field as an empty string when empty_field_as_null is false,
+// even if FE passes an empty null_format. This differs from Hive text serde, where an empty
+// serialization.null.format is a real NULL marker.
+TEST_F(CsvV2ReaderTest, EmptyNullFormatKeepsCsvEmptyFieldAsEmptyString) {
+    const auto null_path = (_test_dir / "empty_null_format.csv").string();
+    std::ofstream output(null_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "1,alice,10\n";
+    output << "2,,20\n";
+    output << "3,NULL,30\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_null_format("");
+    _params.file_attributes.text_params.__set_empty_field_as_null(false);
+    auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 3);
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 1));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "");
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 2));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 2), "NULL");
+}
+
+// Scenario: a non-first split starts inside a record. CSV v2 pre-reads enough delimiter bytes and
+// skips the partial first line so the split begins at the next complete row.
+TEST_F(CsvV2ReaderTest, NonFirstSplitSkipsPartialFirstRecord) {
+    const auto split_path = (_test_dir / "split.csv").string();
+    std::ofstream output(split_path, std::ios::binary);
+    output << "1,skip,10\n";
+    output << "2,bob,20\n";
+    output.close();
+
+    _params.file_attributes.__isset.header_type = false;
+    auto reader = create_reader(split_path, &_params, _slots, &_state, &_profile,
+                                /*range_start_offset=*/3);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2);
+}
+
+// Scenario: compressed CSV cannot be split at arbitrary byte offsets because the decompressor needs
+// the stream from the beginning. V2 should reject such a split before constructing the line reader.
+TEST_F(CsvV2ReaderTest, NonFirstCompressedSplitReturnsError) {
+    _params.__set_format_type(TFileFormatType::FORMAT_CSV_GZ);
+    _params.file_attributes.__isset.header_type = false;
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile,
+                                /*range_start_offset=*/1);
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    EXPECT_FALSE(reader->open(request).ok());
+}
+
+// Scenario: FileScanRequest is a TableReader-to-FileReader contract. Unknown CSV ordinals,
+// out-of-range block positions, and sparse block-position maps must fail during reader open.
+TEST_F(CsvV2ReaderTest, InvalidScanRequestReturnsError) {
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(99))};
+        request->local_positions.emplace(LocalColumnId(99), LocalIndex(0));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+        request->local_positions.emplace(LocalColumnId(0), LocalIndex(2));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                          LocalColumnIndex::top_level(LocalColumnId(1))};
+        request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+        request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+}
+
+// Scenario: CSV v2 can count rows by scanning, but it cannot answer min/max or mixed aggregate
+// requests from metadata.
+TEST_F(CsvV2ReaderTest, UnsupportedAggregateReturnsNotSupported) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::MINMAX;
+    FileAggregateResult aggregate_result;
+    EXPECT_FALSE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+}
+
+} // namespace
+} // namespace doris::format::csv
diff --git a/be/test/format_v2/delimited_text/text_reader_test.cpp b/be/test/format_v2/delimited_text/text_reader_test.cpp
new file mode 100644
index 00000000000000..b6402cab5d86d6
--- /dev/null
+++ b/be/test/format_v2/delimited_text/text_reader_test.cpp
@@ -0,0 +1,965 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/delimited_text/text_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+
+#include "common/consts.h"
+#include "common/object_pool.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/column_mapper.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+#include "testutil/desc_tbl_builder.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format::text {
+namespace {
+
+TFileScanRangeParams text_scan_params() {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_TEXT);
+    params.__set_file_type(TFileType::FILE_LOCAL);
+    TFileAttributes attributes;
+    TFileTextScanRangeParams text_params;
+    text_params.__set_column_separator(",");
+    text_params.__set_line_delimiter("\n");
+    text_params.__set_escape('\\');
+    attributes.__set_text_params(std::move(text_params));
+    params.__set_file_attributes(std::move(attributes));
+    params.__set_column_idxs({0, 1, 2});
+    return params;
+}
+
+std::unique_ptr<io::FileDescription> file_description(const std::string& path,
+                                                      int64_t range_start_offset = 0,
+                                                      int64_t range_size = -1) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->range_start_offset = range_start_offset;
+    desc->range_size = range_size;
+    desc->file_size = static_cast<int64_t>(std::filesystem::file_size(path));
+    return desc;
+}
+
+std::vector<SlotDescriptor*> build_slots(ObjectPool* pool) {
+    DescriptorTblBuilder builder(pool);
+    builder.declare_tuple()
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeInt32>()), "id"}
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeString>()),
+                                           "name"}
+            << TupleDescBuilder::SlotType {make_nullable(std::make_shared<DataTypeInt32>()),
+                                           "score"};
+    auto* desc_tbl = builder.build();
+    return desc_tbl->get_tuple_descriptor(0)->slots();
+}
+
+SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type,
+                               const std::string& name) {
+    TSlotDescriptor slot_desc;
+    slot_desc.__set_id(slot_id);
+    slot_desc.__set_parent(0);
+    slot_desc.__set_slotType(type->to_thrift());
+    slot_desc.__set_columnPos(slot_idx);
+    slot_desc.__set_byteOffset(0);
+    slot_desc.__set_nullIndicatorByte(slot_idx / 8);
+    slot_desc.__set_nullIndicatorBit(slot_idx % 8);
+    slot_desc.__set_slotIdx(slot_idx);
+    slot_desc.__set_isMaterialized(true);
+    slot_desc.__set_colName(name);
+    return pool->add(new SlotDescriptor(slot_desc));
+}
+
+std::vector<SlotDescriptor*> build_struct_slots(ObjectPool* pool) {
+    const auto nullable_int = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int, nullable_int}, Strings {"a", "b"}));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, struct_type, "s"),
+            make_test_slot(pool, 2, 2, make_nullable(std::make_shared<DataTypeInt32>()), "score")};
+}
+
+std::vector<SlotDescriptor*> build_nested_complex_slots(ObjectPool* pool) {
+    const auto nullable_int = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto nullable_string = make_nullable(std::make_shared<DataTypeString>());
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int, nullable_string}, Strings {"a", "b"}));
+    const auto array_type = make_nullable(std::make_shared<DataTypeArray>(struct_type));
+    const auto map_type =
+            make_nullable(std::make_shared<DataTypeMap>(nullable_string, struct_type));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, array_type, "xs"),
+            make_test_slot(pool, 2, 2, map_type, "kv")};
+}
+
+std::vector<SlotDescriptor*> build_char_varchar_slots(ObjectPool* pool) {
+    const auto nullable_char3 =
+            make_nullable(std::make_shared<DataTypeString>(3, PrimitiveType::TYPE_CHAR));
+    const auto nullable_varchar4 =
+            make_nullable(std::make_shared<DataTypeString>(4, PrimitiveType::TYPE_VARCHAR));
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_char3, nullable_varchar4}, Strings {"city", "country"}));
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, nullable_char3, "city"),
+            make_test_slot(pool, 2, 2, struct_type, "region")};
+}
+
+std::unique_ptr<TextReader> create_reader(const std::string& path, TFileScanRangeParams* params,
+                                          const std::vector<SlotDescriptor*>& slots,
+                                          MockRuntimeState* state, RuntimeProfile* profile,
+                                          int64_t range_start_offset = 0, int64_t range_size = -1,
+                                          std::shared_ptr<io::IOContext> io_ctx = nullptr) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(path, range_start_offset, range_size);
+    auto reader = std::make_unique<TextReader>(system_properties, desc, std::move(io_ctx), profile,
+                                               params, slots);
+    EXPECT_TRUE(reader->init(state).ok());
+    return reader;
+}
+
+Block make_block(const std::vector<ColumnDefinition>& schema,
+                 const std::vector<int32_t>& local_ids) {
+    Block block;
+    for (const auto local_id : local_ids) {
+        const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) {
+            return column.local_id == local_id;
+        });
+        EXPECT_TRUE(it != schema.end());
+        block.insert({it->type->create_column(), it->type, it->name});
+    }
+    return block;
+}
+
+std::string nullable_string_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    return nested.get_data_at(row).to_string();
+}
+
+int32_t nullable_int_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+bool is_null_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    return nullable.is_null_at(row);
+}
+
+int32_t nullable_struct_int_child_at(const IColumn& column, size_t child_index, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& struct_column = assert_cast<const ColumnStruct&>(nullable.get_nested_column());
+    const auto& child_nullable =
+            assert_cast<const ColumnNullable&>(struct_column.get_column(child_index));
+    const auto& nested = assert_cast<const ColumnInt32&>(child_nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+int64_t counter_value(RuntimeProfile* profile, const std::string& name) {
+    auto* counter = profile->get_counter(name);
+    EXPECT_NE(counter, nullptr) << name;
+    return counter == nullptr ? 0 : counter->value();
+}
+
+class NullableIntGreaterThanExpr final : public VExpr {
+public:
+    NullableIntGreaterThanExpr(size_t block_position, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& data = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    !nullable.is_null_at(source_row) && data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<NullableIntGreaterThanExpr>(_block_position, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    int32_t _value;
+    const std::string _name = "NullableIntGreaterThanExpr";
+};
+
+class StructIntChildGreaterThanExpr final : public VExpr {
+public:
+    StructIntChildGreaterThanExpr(size_t block_position, size_t child_index, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _child_index(child_index),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& struct_column = assert_cast<const ColumnStruct&>(nullable.get_nested_column());
+        const auto& child_nullable =
+                assert_cast<const ColumnNullable&>(struct_column.get_column(_child_index));
+        const auto& child_data =
+                assert_cast<const ColumnInt32&>(child_nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& data = result->get_data();
+        data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            data[row] = !nullable.is_null_at(source_row) &&
+                        !child_nullable.is_null_at(source_row) &&
+                        child_data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<StructIntChildGreaterThanExpr>(_block_position,
+                                                                       _child_index, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    size_t _child_index;
+    int32_t _value;
+    const std::string _name = "StructIntChildGreaterThanExpr";
+};
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto context = VExprContext::create_shared(expr);
+    auto status = context->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = context->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return context;
+}
+
+class TextV2ReaderTest : public testing::Test {
+public:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_text_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "reader.text").string();
+        std::ofstream output(_file_path, std::ios::binary);
+        output << "1,alice,10\n";
+        output << "2,bob,20\n";
+        output.close();
+        _slots = build_slots(&_pool);
+        _params = text_scan_params();
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+protected:
+    ObjectPool _pool;
+    MockRuntimeState _state;
+    RuntimeProfile _profile {"text_v2_reader_test"};
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+    std::vector<SlotDescriptor*> _slots;
+    TFileScanRangeParams _params;
+};
+
+// Scenario: Text v2 exposes FE-provided file slots as nullable file-local schema using column_idxs
+// as Hive text field ordinals.
+TEST_F(TextV2ReaderTest, SchemaUsesSlotTypesAndColumnIdxs) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    EXPECT_EQ(schema[0].name, "id");
+    EXPECT_EQ(schema[0].local_id, 0);
+    EXPECT_TRUE(schema[0].type->is_nullable());
+    EXPECT_EQ(schema[1].name, "name");
+    EXPECT_EQ(schema[1].local_id, 1);
+    EXPECT_TRUE(schema[1].type->is_nullable());
+}
+
+// Scenario: FE slot types for Hive text are table target types. CHAR/VARCHAR length is not stored
+// in the text file, so the file schema must expose bounded strings as unbounded STRING. Otherwise
+// TableReader believes the file value already satisfies the table length and skips truncation.
+TEST_F(TextV2ReaderTest, SchemaTreatsCharVarcharSlotsAsUnboundedFileStrings) {
+    auto slots = build_char_varchar_slots(&_pool);
+    auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    const auto city_type = remove_nullable(schema[1].type);
+    EXPECT_EQ(city_type->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(assert_cast<const DataTypeString*>(city_type.get())->len(), -1);
+
+    const auto region_type = remove_nullable(schema[2].type);
+    ASSERT_EQ(region_type->get_primitive_type(), TYPE_STRUCT);
+    const auto* region_struct = assert_cast<const DataTypeStruct*>(region_type.get());
+    ASSERT_EQ(region_struct->get_elements().size(), 2);
+    EXPECT_EQ(remove_nullable(region_struct->get_element(0))->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(region_struct->get_element(1))->get_primitive_type(), TYPE_STRING);
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(remove_nullable(schema[2].children[0].type)->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(schema[2].children[1].type)->get_primitive_type(), TYPE_STRING);
+}
+
+// Scenario: Hive text is row-oriented and cannot lazy-read predicate columns separately. The
+// reader declares that capability by choosing MaterializedColumnMapper itself.
+TEST_F(TextV2ReaderTest, CreatesMaterializedColumnMapper) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto mapper = reader->create_column_mapper({.mode = TableColumnMappingMode::BY_NAME});
+
+    ASSERT_NE(dynamic_cast<MaterializedColumnMapper*>(mapper.get()), nullptr);
+}
+
+// Scenario: Text v2 exposes delimited-text profile counters for read, parse, deserialize, and
+// file-local conjunct filtering, so scanner profiles can explain where row-reader time is spent.
+TEST_F(TextV2ReaderTest, ProfileCountersTrackReadParseDeserializeAndFilter) {
+    const auto profile_path = (_test_dir / "profile.text").string();
+    std::ofstream output(profile_path, std::ios::binary);
+    output << "\n";
+    output << "1,alice,10\n";
+    output << "2,bob,20\n";
+    output.close();
+
+    _state._query_options.__set_read_csv_empty_line_as_null(true);
+    auto io_ctx = std::make_shared<io::IOContext>();
+    auto reader = create_reader(profile_path, &_params, _slots, &_state, &_profile, 0, -1, io_ctx);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(1));
+    request->conjuncts = {
+            prepared_conjunct(&_state, std::make_shared<NullableIntGreaterThanExpr>(1, 15))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2);
+
+    EXPECT_NE(_profile.get_counter("OpenFileTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("CreateLineReaderTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("ReadLineTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("SplitLineTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("DeserializeTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("ConjunctFilterTime"), nullptr);
+    EXPECT_NE(_profile.get_counter("DeleteConjunctFilterTime"), nullptr);
+    EXPECT_EQ(counter_value(&_profile, "RawLinesRead"), 3);
+    EXPECT_EQ(counter_value(&_profile, "RowsReadBeforeFilter"), 3);
+    EXPECT_EQ(counter_value(&_profile, "RowsFilteredByConjunct"), 2);
+    EXPECT_EQ(io_ctx->predicate_filtered_rows, 2);
+    EXPECT_EQ(counter_value(&_profile, "RowsFilteredByDeleteConjunct"), 0);
+    EXPECT_EQ(counter_value(&_profile, "RowsReturned"), 1);
+    EXPECT_EQ(counter_value(&_profile, "EmptyLinesRead"), 1);
+    EXPECT_EQ(counter_value(&_profile, "SkippedLines"), 0);
+    EXPECT_EQ(counter_value(&_profile, "CellsDeserialized"), 6);
+}
+
+// Scenario: Hive text has no embedded nested schema, but TableColumnMapper still needs semantic
+// children for complex table columns. The reader synthesizes ARRAY/MAP/STRUCT children from the
+// slot type while keeping the top-level local id as the text field ordinal from column_idxs.
+TEST_F(TextV2ReaderTest, SchemaSynthesizesComplexChildrenForColumnMapper) {
+    _params.__set_column_idxs({4, 7, 9});
+    auto slots = build_nested_complex_slots(&_pool);
+    auto reader = create_reader(_file_path, &_params, slots, &_state, &_profile);
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    EXPECT_EQ(schema[1].name, "xs");
+    EXPECT_EQ(schema[1].local_id, 7);
+    ASSERT_EQ(schema[1].children.size(), 1);
+    EXPECT_EQ(schema[1].children[0].name, "element");
+    EXPECT_EQ(schema[1].children[0].local_id, 0);
+    ASSERT_EQ(schema[1].children[0].children.size(), 2);
+    EXPECT_EQ(schema[1].children[0].children[0].name, "a");
+    EXPECT_EQ(schema[1].children[0].children[0].local_id, 0);
+    EXPECT_EQ(schema[1].children[0].children[1].name, "b");
+    EXPECT_EQ(schema[1].children[0].children[1].local_id, 1);
+
+    EXPECT_EQ(schema[2].name, "kv");
+    EXPECT_EQ(schema[2].local_id, 9);
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(schema[2].children[0].name, "key");
+    EXPECT_EQ(schema[2].children[0].local_id, 0);
+    EXPECT_EQ(schema[2].children[1].name, "value");
+    EXPECT_EQ(schema[2].children[1].local_id, 1);
+    ASSERT_EQ(schema[2].children[1].children.size(), 2);
+    EXPECT_EQ(schema[2].children[1].children[0].name, "a");
+    EXPECT_EQ(schema[2].children[1].children[1].name, "b");
+}
+
+// Scenario: Hive text escapes a field separator inside a string. The splitter keeps the escaped
+// separator in the same field, and hive-text serde unescapes the final string value.
+TEST_F(TextV2ReaderTest, EscapedSeparatorStaysInsideStringField) {
+    const auto escaped_path = (_test_dir / "escaped.text").string();
+    std::ofstream output(escaped_path, std::ios::binary);
+    output << "1,alice\\,team,10\n";
+    output.close();
+
+    auto reader = create_reader(escaped_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice,team");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 10);
+}
+
+// Scenario: Hive text supports multi-character field separators. V2 must not split on partial
+// matches and must still honor FileScanRequest output positions.
+TEST_F(TextV2ReaderTest, MultiCharacterSeparatorReadsRequestedColumns) {
+    const auto multi_path = (_test_dir / "multi.text").string();
+    std::ofstream output(multi_path, std::ios::binary);
+    output << "3||carol||30\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_column_separator("||");
+    auto reader = create_reader(multi_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "carol");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 3);
+}
+
+// Scenario: column_idxs can map table slots to non-identity Hive text field ordinals.
+TEST_F(TextV2ReaderTest, ColumnIdxsMapSlotsToTextOrdinals) {
+    const auto remap_path = (_test_dir / "remapped.text").string();
+    std::ofstream output(remap_path, std::ios::binary);
+    output << "doris,40,4\n";
+    output.close();
+
+    _params.__set_column_idxs({2, 0, 1});
+    auto reader = create_reader(remap_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    EXPECT_EQ(schema[0].local_id, 2);
+    EXPECT_EQ(schema[1].local_id, 0);
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2)),
+                                      LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {2, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 4);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "doris");
+}
+
+// Scenario: Hive text complex values are encoded inside one top-level text field. V2 reads the
+// complete struct field first, then evaluates a file-local predicate on one child, covering
+// `SELECT s.a WHERE s.b > 10` without pretending that Text has physical nested-column pruning.
+TEST_F(TextV2ReaderTest, FullStructColumnSupportsChildConjunctFiltering) {
+    const auto complex_path = (_test_dir / "complex.text").string();
+    std::ofstream output(complex_path, std::ios::binary);
+    output << "1|11,5|10\n";
+    output << "2|22,20|20\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_column_separator("|");
+    _params.file_attributes.text_params.__set_collection_delimiter(",");
+    _params.__set_column_idxs({0, 1, 2});
+    auto slots = build_struct_slots(&_pool);
+    auto reader = create_reader(complex_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    request->conjuncts = {prepared_conjunct(
+            &_state, std::make_shared<StructIntChildGreaterThanExpr>(
+                             /*block_position=*/0, /*child_index=*/1, /*value=*/10))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 0, 0), 22);
+    EXPECT_EQ(nullable_struct_int_child_at(*block.get_by_position(0).column, 1, 0), 20);
+}
+
+// Scenario: missing Hive text fields are materialized as NULL rather than shifting later columns.
+TEST_F(TextV2ReaderTest, MissingRequestedFieldUsesNullFormat) {
+    const auto missing_path = (_test_dir / "missing.text").string();
+    std::ofstream output(missing_path, std::ios::binary);
+    output << "1,alice\n";
+    output.close();
+
+    auto reader = create_reader(missing_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+}
+
+// Scenario: Text v2 can scan a request with no materialized columns. This is used by table-level
+// COUNT-style paths where the reader must still return the number of logical rows read.
+TEST_F(TextV2ReaderTest, EmptyFileLocalProjectionStillReportsRows) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    Block block;
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_EQ(rows, 2);
+    EXPECT_FALSE(eof);
+}
+
+// Scenario: stream load/http_stream text input is not backed by a filesystem. If TableReader fails
+// to preserve the stream load id, the v2 reader should report that directly instead of calling the
+// generic FileFactory path and returning "unsupported file reader type: 2".
+TEST_F(TextV2ReaderTest, StreamInputRequiresLoadIdBeforeOpeningPipe) {
+    _params.__set_file_type(TFileType::FILE_STREAM);
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    const auto status = reader->open(request);
+
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("stream reader requires load id"), std::string::npos)
+            << status;
+}
+
+// Scenario: explicit text null_format is honored by Hive-text serde. Unlike CSV
+// empty_field_as_null, an empty text field is not NULL unless it equals null_format exactly.
+TEST_F(TextV2ReaderTest, NullFormatProducesNullableValue) {
+    const auto null_path = (_test_dir / "null_format.text").string();
+    std::ofstream output(null_path, std::ios::binary);
+    output << "1,NULL,10\n";
+    output << "2,,20\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_null_format("NULL");
+    auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 1));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "");
+}
+
+// Scenario: Hive SerDe can define the empty string itself as NULL. The nullable string fast path
+// must match the generic nullable serde behavior instead of treating empty null_format as
+// "null format is not configured".
+TEST_F(TextV2ReaderTest, EmptyNullFormatProducesNullableValue) {
+    const auto null_path = (_test_dir / "empty_null_format.text").string();
+    std::ofstream output(null_path, std::ios::binary);
+    output << "1,alice,10\n";
+    output << "2,,20\n";
+    output << "3,NULL,30\n";
+    output.close();
+
+    _params.file_attributes.text_params.__set_null_format("");
+    auto reader = create_reader(null_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(1))};
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 3);
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 1));
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 2));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 2), "NULL");
+}
+
+// Scenario: TEXT_WITH_NAMES_AND_TYPES-style headers share the delimited text base skip path with
+// CSV. Both header records must be skipped before the first data row is read.
+TEST_F(TextV2ReaderTest, HeaderNamesAndTypesSkipsTwoLines) {
+    const auto header_path = (_test_dir / "header_names_types.text").string();
+    std::ofstream output(header_path, std::ios::binary);
+    output << "id,name,score\n";
+    output << "INT,STRING,INT\n";
+    output << "7,carol,70\n";
+    output.close();
+
+    _params.file_attributes.__set_header_type(BeConsts::CSV_WITH_NAMES_AND_TYPES);
+    auto reader = create_reader(header_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 7);
+}
+
+// Scenario: the shared delimited text base removes UTF-8 BOM from the first returned data line.
+// This matters for headerless text files whose first column is numeric.
+TEST_F(TextV2ReaderTest, BomIsRemovedFromFirstDataLineWithoutHeader) {
+    const auto bom_path = (_test_dir / "bom_data.text").string();
+    std::ofstream output(bom_path, std::ios::binary);
+    output.write("\xEF\xBB\xBF", 3);
+    output << "5,bom,50\n";
+    output.close();
+
+    auto reader = create_reader(bom_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 5);
+}
+
+// Scenario: when FE does not set header_type, skip_lines should be honored by the shared
+// delimited text base before TextReader starts splitting rows.
+TEST_F(TextV2ReaderTest, SkipLinesUsedWhenHeaderTypeUnset) {
+    const auto skip_path = (_test_dir / "skip_lines.text").string();
+    std::ofstream output(skip_path, std::ios::binary);
+    output << "skip me\n";
+    output << "skip me too\n";
+    output << "3,dan,30\n";
+    output.close();
+
+    _params.file_attributes.__isset.header_type = false;
+    _params.file_attributes.__set_skip_lines(2);
+    auto reader = create_reader(skip_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 3);
+}
+
+// Scenario: Hive TEXTFILE treats an empty physical line as a record. For the first field it
+// deserializes an empty value; missing trailing fields are filled with null_format.
+TEST_F(TextV2ReaderTest, EmptyLineAsRecordByDefault) {
+    const auto empty_line_path = (_test_dir / "empty_line.text").string();
+    std::ofstream output(empty_line_path, std::ios::binary);
+    output << "\n";
+    output << "4,erin,40\n";
+    output.close();
+
+    auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                      LocalColumnIndex::top_level(LocalColumnId(1)),
+                                      LocalColumnIndex::top_level(LocalColumnId(2))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(1));
+    request->local_positions.emplace(LocalColumnId(2), LocalIndex(2));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0, 1, 2});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_TRUE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_TRUE(is_null_at(*block.get_by_position(1).column, 0));
+    EXPECT_TRUE(is_null_at(*block.get_by_position(2).column, 0));
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 1), 4);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 1), "erin");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(2).column, 1), 40);
+}
+
+// Scenario: for a single-column Hive TEXTFILE table, an empty physical line is one empty string
+// field rather than a skipped row.
+TEST_F(TextV2ReaderTest, EmptyLineAsSingleEmptyStringField) {
+    const auto empty_line_path = (_test_dir / "empty_line_single_string.text").string();
+    std::ofstream output(empty_line_path, std::ios::binary);
+    output << "\n";
+    output << "erin\n";
+    output.close();
+
+    _params.__set_column_idxs({0});
+    const std::vector<SlotDescriptor*> slots {make_test_slot(
+            &_pool, 0, 0, make_nullable(std::make_shared<DataTypeString>()), "value")};
+    auto reader = create_reader(empty_line_path, &_params, slots, &_state, &_profile);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_FALSE(is_null_at(*block.get_by_position(0).column, 0));
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "");
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "erin");
+}
+
+// Scenario: text v2 COUNT pushdown counts empty physical lines as Hive TEXTFILE records.
+TEST_F(TextV2ReaderTest, CountAggregatePreservesEmptyLines) {
+    const auto empty_line_path = (_test_dir / "empty_line_count.text").string();
+    std::ofstream output(empty_line_path, std::ios::binary);
+    output << "\n";
+    output << "4,erin,40\n";
+    output.close();
+
+    auto reader = create_reader(empty_line_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::COUNT;
+    FileAggregateResult aggregate_result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+    EXPECT_EQ(aggregate_result.count, 2);
+}
+
+// Scenario: Text v2 COUNT pushdown scans rows because text files do not expose row-count metadata.
+TEST_F(TextV2ReaderTest, CountAggregateScansRows) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::COUNT;
+    FileAggregateResult aggregate_result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+    EXPECT_EQ(aggregate_result.count, 2);
+}
+
+// Scenario: a non-first split starts inside a text record and must skip the partial first line.
+TEST_F(TextV2ReaderTest, NonFirstSplitSkipsPartialFirstRecord) {
+    const auto split_path = (_test_dir / "split.text").string();
+    std::ofstream output(split_path, std::ios::binary);
+    output << "1,skip,10\n";
+    output << "2,bob,20\n";
+    output.close();
+
+    auto reader = create_reader(split_path, &_params, _slots, &_state, &_profile,
+                                /*range_start_offset=*/3);
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 2);
+}
+
+// Scenario: compressed text cannot be split at arbitrary byte offsets because the decompressor
+// needs the stream from the beginning. V2 should reject such a split before constructing the line
+// reader.
+TEST_F(TextV2ReaderTest, NonFirstCompressedSplitReturnsError) {
+    _params.__set_compress_type(TFileCompressType::GZ);
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile,
+                                /*range_start_offset=*/1);
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    EXPECT_FALSE(reader->open(request).ok());
+}
+
+// Scenario: FileScanRequest is a TableReader-to-FileReader contract. Unknown TEXT ordinals,
+// out-of-range block positions, and sparse block-position maps must fail during reader open.
+TEST_F(TextV2ReaderTest, InvalidScanRequestReturnsError) {
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(99))};
+        request->local_positions.emplace(LocalColumnId(99), LocalIndex(0));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0))};
+        request->local_positions.emplace(LocalColumnId(0), LocalIndex(2));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+    {
+        auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+        auto request = std::make_shared<FileScanRequest>();
+        request->non_predicate_columns = {LocalColumnIndex::top_level(LocalColumnId(0)),
+                                          LocalColumnIndex::top_level(LocalColumnId(1))};
+        request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+        request->local_positions.emplace(LocalColumnId(1), LocalIndex(0));
+        EXPECT_FALSE(reader->open(request).ok());
+    }
+}
+
+// Scenario: unsupported aggregate requests must fail explicitly instead of returning partial
+// results from the scan path.
+TEST_F(TextV2ReaderTest, UnsupportedAggregateReturnsNotSupported) {
+    auto reader = create_reader(_file_path, &_params, _slots, &_state, &_profile);
+    auto request = std::make_shared<FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::type::MINMAX;
+    FileAggregateResult aggregate_result;
+    EXPECT_FALSE(reader->get_aggregate_result(aggregate_request, &aggregate_result).ok());
+}
+
+} // namespace
+} // namespace doris::format::text
diff --git a/be/test/format_v2/expr/cast_test.cpp b/be/test/format_v2/expr/cast_test.cpp
new file mode 100644
index 00000000000000..341b89433f0c08
--- /dev/null
+++ b/be/test/format_v2/expr/cast_test.cpp
@@ -0,0 +1,172 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/cast.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "runtime/descriptors.h"
+#include "testutil/column_helper.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format {
+
+class CastTest : public testing::Test {
+protected:
+    void SetUp() override { state.set_enable_strict_cast(true); }
+
+    static VExprContextSPtr create_context(const DataTypePtr& return_type,
+                                           const DataTypePtr& child_type, int child_column_id = 0) {
+        auto cast = Cast::create_shared(return_type);
+        cast->add_child(VSlotRef::create_shared(child_column_id, child_column_id, -1, child_type,
+                                                "source_column"));
+        return VExprContext::create_shared(cast);
+    }
+
+    Status prepare_open_execute(VExprContext* context, Block* block, int* result_column_id) {
+        RETURN_IF_ERROR(context->prepare(&state, RowDescriptor()));
+        RETURN_IF_ERROR(context->open(&state));
+        return context->execute(block, result_column_id);
+    }
+
+    MockRuntimeState state;
+};
+
+TEST_F(CastTest, CastIntSlotToBigInt) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeInt64>();
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, -2, 3}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    ASSERT_EQ(result_column_id, 1);
+    ASSERT_EQ(block.columns(), 2);
+    EXPECT_EQ(block.get_by_position(result_column_id).type, return_type);
+    const auto& result_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(result_column_id).column);
+    EXPECT_EQ(result_column.get_data()[0], 1);
+    EXPECT_EQ(result_column.get_data()[1], -2);
+    EXPECT_EQ(result_column.get_data()[2], 3);
+
+    context->close();
+}
+
+TEST_F(CastTest, CastStringSlotToNullableInt) {
+    state.set_enable_strict_cast(false);
+    auto source_type = std::make_shared<DataTypeString>();
+    auto return_type = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt32>());
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeString>({"10", "bad", "-3"}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& nullable_column =
+            assert_cast<const ColumnNullable&>(*block.get_by_position(result_column_id).column);
+    const auto& result_column =
+            assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+    const auto& null_map = nullable_column.get_null_map_data();
+    EXPECT_EQ(result_column.get_data()[0], 10);
+    EXPECT_EQ(result_column.get_data()[2], -3);
+    EXPECT_EQ(null_map[0], 0);
+    EXPECT_EQ(null_map[1], 1);
+    EXPECT_EQ(null_map[2], 0);
+
+    context->close();
+}
+
+TEST_F(CastTest, CastLiteralToString) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeString>();
+    auto cast = Cast::create_shared(return_type);
+    cast->add_child(VLiteral::create_shared(source_type, Field::create_field<TYPE_INT>(123)));
+    auto context = VExprContext::create_shared(cast);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({1, 2, 3}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& result = block.get_by_position(result_column_id);
+    EXPECT_EQ(result.type->to_string(*result.column, 0), "123");
+    EXPECT_EQ(result.type->to_string(*result.column, 1), "123");
+    EXPECT_EQ(result.type->to_string(*result.column, 2), "123");
+
+    context->close();
+}
+
+TEST_F(CastTest, EmptyBlockAppendsEmptyResultColumn) {
+    auto source_type = std::make_shared<DataTypeInt32>();
+    auto return_type = std::make_shared<DataTypeInt64>();
+    auto context = create_context(return_type, source_type);
+    Block block;
+    block.insert(ColumnHelper::create_column_with_name<DataTypeInt32>({}));
+
+    int result_column_id = -1;
+    auto status = prepare_open_execute(context.get(), &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    ASSERT_EQ(result_column_id, 1);
+    EXPECT_EQ(block.get_by_position(result_column_id).column->size(), 0);
+
+    context->close();
+}
+
+TEST_F(CastTest, PrepareRejectsMissingChild) {
+    auto cast = Cast::create_shared(std::make_shared<DataTypeInt64>());
+    VExprContext context(cast);
+
+    auto status = context.prepare(&state, RowDescriptor());
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos);
+}
+
+TEST_F(CastTest, PrepareRejectsMultipleChildren) {
+    auto child_type = std::make_shared<DataTypeInt32>();
+    auto cast = Cast::create_shared(std::make_shared<DataTypeInt64>());
+    cast->add_child(VSlotRef::create_shared(0, 0, -1, child_type, "c0"));
+    cast->add_child(VSlotRef::create_shared(1, 1, -1, child_type, "c1"));
+    VExprContext context(cast);
+
+    auto status = context.prepare(&state, RowDescriptor());
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos);
+}
+
+} // namespace doris::format
diff --git a/be/test/format_v2/expr/delete_predicate_test.cpp b/be/test/format_v2/expr/delete_predicate_test.cpp
new file mode 100644
index 00000000000000..264a9fdf9b19f5
--- /dev/null
+++ b/be/test/format_v2/expr/delete_predicate_test.cpp
@@ -0,0 +1,168 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/delete_predicate.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/block/block.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "exprs/vexpr_context.h"
+#include "runtime/descriptors.h"
+#include "testutil/mock/mock_slot_ref.h"
+
+namespace doris::format {
+
+class DeletePredicateTest : public testing::Test {
+protected:
+    static Block make_block(const std::vector<int64_t>& row_ids) {
+        auto column = ColumnInt64::create();
+        for (auto row_id : row_ids) {
+            column->insert_value(row_id);
+        }
+
+        Block block;
+        block.insert({std::move(column), std::make_shared<DataTypeInt64>(), "row_id"});
+        return block;
+    }
+
+    static std::vector<UInt8> result_column_data(const Block& block, int result_column_id) {
+        const auto& result_column =
+                assert_cast<const ColumnBool&>(*block.get_by_position(result_column_id).column);
+        return {result_column.get_data().begin(), result_column.get_data().end()};
+    }
+
+    static Status execute_delete_predicate(const std::vector<int64_t>& deleted_rows, Block* block,
+                                           int* result_column_id) {
+        auto delete_predicate = std::make_shared<DeletePredicate>(deleted_rows);
+        delete_predicate->_open_finished = true;
+        delete_predicate->add_child(
+                std::make_shared<MockSlotRef>(0, std::make_shared<DataTypeInt64>()));
+
+        VExprContext context(delete_predicate);
+        return delete_predicate->execute(&context, block, result_column_id);
+    }
+};
+
+TEST_F(DeletePredicateTest, MatchDeletedRowsInInputRange) {
+    const std::vector<int64_t> deleted_rows {-3, 1, 4, 8, 12, 20};
+    auto block = make_block({0, 1, 2, 3, 4, 5, 8, 12});
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    EXPECT_EQ(result_column_id, 1);
+    EXPECT_EQ(result_column_data(block, result_column_id),
+              std::vector<UInt8>({0, 1, 0, 0, 1, 0, 1, 1}));
+}
+
+TEST_F(DeletePredicateTest, EmptyDeletedRowsReturnAllFalse) {
+    const std::vector<int64_t> deleted_rows;
+    auto block = make_block({1, 2, 3});
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    EXPECT_EQ(result_column_data(block, result_column_id), std::vector<UInt8>({0, 0, 0}));
+}
+
+TEST_F(DeletePredicateTest, DeletedRowsOutsideInputRangeReturnAllFalse) {
+    const std::vector<int64_t> deleted_rows {-10, -1, 10, 11};
+    auto block = make_block({1, 2, 3});
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    EXPECT_EQ(result_column_data(block, result_column_id), std::vector<UInt8>({0, 0, 0}));
+}
+
+TEST_F(DeletePredicateTest, EmptyRowIdColumnAppendsEmptyResultColumn) {
+    const std::vector<int64_t> deleted_rows {1, 2, 3};
+    auto block = make_block({});
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+
+    EXPECT_EQ(block.columns(), 2);
+    EXPECT_EQ(result_column_id, 1);
+    EXPECT_EQ(result_column_data(block, result_column_id), std::vector<UInt8>({}));
+}
+
+TEST_F(DeletePredicateTest, MissingRowIdColumnReturnsError) {
+    const std::vector<int64_t> deleted_rows {1, 2, 3};
+    Block block;
+
+    int result_column_id = -1;
+    auto status = execute_delete_predicate(deleted_rows, &block, &result_column_id);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("invalid column id"), std::string::npos);
+    EXPECT_EQ(block.columns(), 0);
+    EXPECT_EQ(result_column_id, -1);
+}
+
+TEST_F(DeletePredicateTest, MissingRowIdChildReturnsError) {
+    const std::vector<int64_t> deleted_rows {1};
+    auto block = make_block({1});
+    auto delete_predicate = std::make_shared<DeletePredicate>(deleted_rows);
+    delete_predicate->_open_finished = true;
+    VExprContext context(delete_predicate);
+
+    int result_column_id = -1;
+    auto status = delete_predicate->execute(&context, &block, &result_column_id);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("exactly 1 child expr"), std::string::npos);
+}
+
+TEST_F(DeletePredicateTest, ExecuteColumnImplReturnsError) {
+    const std::vector<int64_t> deleted_rows {1};
+    DeletePredicate delete_predicate(deleted_rows);
+    VExprContext context(std::make_shared<DeletePredicate>(deleted_rows));
+    ColumnPtr result_column;
+
+    auto status =
+            delete_predicate.execute_column_impl(&context, nullptr, nullptr, 0, result_column);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("DeletePredicate::execute_column_impl"), std::string::npos);
+}
+
+TEST_F(DeletePredicateTest, LifecycleAndDebugString) {
+    const std::vector<int64_t> deleted_rows {1};
+    DeletePredicate delete_predicate(deleted_rows);
+    VExprContext context(std::make_shared<DeletePredicate>(deleted_rows));
+    RowDescriptor row_desc;
+
+    auto status = delete_predicate.prepare(nullptr, row_desc, &context);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(delete_predicate.expr_name(), "DeletePredicate");
+    EXPECT_EQ(delete_predicate.debug_string(), "DeletePredicate");
+
+    status = delete_predicate.open(nullptr, &context, FunctionContext::THREAD_LOCAL);
+    ASSERT_TRUE(status.ok()) << status;
+    delete_predicate.close(&context, FunctionContext::THREAD_LOCAL);
+}
+
+} // namespace doris::format
diff --git a/be/test/format_v2/expr/equality_delete_predicate_test.cpp b/be/test/format_v2/expr/equality_delete_predicate_test.cpp
new file mode 100644
index 00000000000000..886a86713fe8da
--- /dev/null
+++ b/be/test/format_v2/expr/equality_delete_predicate_test.cpp
@@ -0,0 +1,181 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/expr/equality_delete_predicate.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "common/status.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/expr/cast.h"
+#include "runtime/descriptors.h"
+#include "testutil/column_helper.h"
+#include "testutil/mock/mock_runtime_state.h"
+#include "testutil/mock/mock_slot_ref.h"
+
+namespace doris::format {
+
+class EqualityDeletePredicateTest : public testing::Test {
+protected:
+    static ColumnWithTypeAndName make_nullable_int_column(
+            const std::string& name, const std::vector<std::optional<int>>& values) {
+        auto data = ColumnInt32::create();
+        auto null_map = ColumnUInt8::create();
+        for (const auto& value : values) {
+            data->insert_value(value.value_or(0));
+            null_map->insert_value(!value.has_value());
+        }
+        auto type = make_nullable(std::make_shared<DataTypeInt32>());
+        return {ColumnNullable::create(std::move(data), std::move(null_map)), type, name};
+    }
+
+    static ColumnWithTypeAndName make_nullable_string_column(
+            const std::string& name, const std::vector<std::optional<std::string>>& values) {
+        auto data = ColumnString::create();
+        auto null_map = ColumnUInt8::create();
+        for (const auto& value : values) {
+            const std::string data_value = value.value_or("");
+            data->insert_data(data_value.data(), data_value.size());
+            null_map->insert_value(!value.has_value());
+        }
+        auto type = make_nullable(std::make_shared<DataTypeString>());
+        return {ColumnNullable::create(std::move(data), std::move(null_map)), type, name};
+    }
+
+    static std::vector<UInt8> result_column_data(const Block& block, int result_column_id) {
+        const auto& result_column =
+                assert_cast<const ColumnBool&>(*block.get_by_position(result_column_id).column);
+        return {result_column.get_data().begin(), result_column.get_data().end()};
+    }
+
+    static Status execute_equality_delete_predicate(Block delete_block, std::vector<int> field_ids,
+                                                    Block* data_block, int* result_column_id) {
+        auto predicate =
+                std::make_shared<EqualityDeletePredicate>(std::move(delete_block), field_ids);
+        predicate->_open_finished = true;
+        for (size_t idx = 0; idx < field_ids.size(); ++idx) {
+            predicate->add_child(
+                    std::make_shared<MockSlotRef>(idx, data_block->get_by_position(idx).type));
+        }
+
+        VExprContext context(predicate);
+        return predicate->execute(&context, data_block, result_column_id);
+    }
+
+    static Status execute_prepared_equality_delete_predicate(const VExprContextSPtr& context,
+                                                             MockRuntimeState* state,
+                                                             Block* data_block,
+                                                             int* result_column_id) {
+        RETURN_IF_ERROR(context->prepare(state, RowDescriptor()));
+        RETURN_IF_ERROR(context->open(state));
+        return context->execute(data_block, result_column_id);
+    }
+};
+
+TEST_F(EqualityDeletePredicateTest, MatchSingleColumn) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {1, 4}));
+    Block data_block;
+    data_block.insert(make_nullable_int_column("id", {1, 2, 3, 4}));
+
+    int result_column_id = -1;
+    auto status = execute_equality_delete_predicate(std::move(delete_block), {1}, &data_block,
+                                                    &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector<UInt8>({1, 0, 0, 1}));
+}
+
+TEST_F(EqualityDeletePredicateTest, MatchMultipleColumns) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {1, 2}));
+    delete_block.insert(make_nullable_string_column("name", {"a", "b"}));
+    Block data_block;
+    data_block.insert(make_nullable_int_column("id", {1, 1, 2, 2}));
+    data_block.insert(make_nullable_string_column("name", {"a", "b", "a", "b"}));
+
+    int result_column_id = -1;
+    auto status = execute_equality_delete_predicate(std::move(delete_block), {1, 2}, &data_block,
+                                                    &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector<UInt8>({1, 0, 0, 1}));
+}
+
+TEST_F(EqualityDeletePredicateTest, MatchNullValues) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {std::nullopt}));
+    Block data_block;
+    data_block.insert(make_nullable_int_column("id", {1, std::nullopt, 3}));
+
+    int result_column_id = -1;
+    auto status = execute_equality_delete_predicate(std::move(delete_block), {1}, &data_block,
+                                                    &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector<UInt8>({0, 1, 0}));
+}
+
+TEST_F(EqualityDeletePredicateTest, MatchAfterCastToDeleteKeyType) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {1, 4}));
+    Block data_block;
+    data_block.insert(ColumnHelper::create_column_with_name<DataTypeInt64>({1, 2, 4}));
+
+    auto predicate = std::make_shared<EqualityDeletePredicate>(std::move(delete_block),
+                                                               std::vector<int> {1});
+    auto cast_expr = Cast::create_shared(make_nullable(std::make_shared<DataTypeInt32>()));
+    cast_expr->add_child(std::make_shared<MockSlotRef>(0, data_block.get_by_position(0).type));
+    predicate->add_child(std::move(cast_expr));
+    auto context = VExprContext::create_shared(predicate);
+    MockRuntimeState state;
+
+    int result_column_id = -1;
+    auto status = execute_prepared_equality_delete_predicate(context, &state, &data_block,
+                                                             &result_column_id);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(result_column_data(data_block, result_column_id), std::vector<UInt8>({1, 0, 1}));
+    context->close();
+}
+
+TEST_F(EqualityDeletePredicateTest, ChildCountMismatchReturnsError) {
+    Block delete_block;
+    delete_block.insert(make_nullable_int_column("id", {1}));
+    auto predicate = std::make_shared<EqualityDeletePredicate>(std::move(delete_block),
+                                                               std::vector<int> {1});
+    predicate->_open_finished = true;
+    Block data_block;
+    data_block.insert(make_nullable_int_column("id", {1}));
+    VExprContext context(predicate);
+
+    int result_column_id = -1;
+    auto status = predicate->execute(&context, &data_block, &result_column_id);
+    ASSERT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("should have 1 child exprs"), std::string::npos);
+}
+
+} // namespace doris::format
diff --git a/be/test/format_v2/json/json_reader_test.cpp b/be/test/format_v2/json/json_reader_test.cpp
new file mode 100644
index 00000000000000..31c77501ce67c6
--- /dev/null
+++ b/be/test/format_v2/json/json_reader_test.cpp
@@ -0,0 +1,608 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/json/json_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/object_pool.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/column_data.h"
+#include "io/io_common.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_profile.h"
+#include "testutil/mock/mock_runtime_state.h"
+
+namespace doris::format::json {
+namespace {
+
+TFileScanRangeParams json_scan_params(bool read_json_by_line = true, bool strip_outer_array = false,
+                                      std::string jsonpaths = "", std::string json_root = "",
+                                      bool ignore_malformed = false) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_JSON);
+    params.__set_file_type(TFileType::FILE_LOCAL);
+    params.__set_compress_type(TFileCompressType::PLAIN);
+    TFileAttributes attributes;
+    TFileTextScanRangeParams text_params;
+    text_params.__set_line_delimiter("\n");
+    attributes.__set_text_params(std::move(text_params));
+    attributes.__set_read_json_by_line(read_json_by_line);
+    attributes.__set_strip_outer_array(strip_outer_array);
+    attributes.__set_num_as_string(false);
+    attributes.__set_fuzzy_parse(false);
+    if (!jsonpaths.empty()) {
+        attributes.__set_jsonpaths(std::move(jsonpaths));
+    }
+    if (!json_root.empty()) {
+        attributes.__set_json_root(std::move(json_root));
+    }
+    if (ignore_malformed) {
+        attributes.__set_openx_json_ignore_malformed(true);
+    }
+    params.__set_file_attributes(std::move(attributes));
+    return params;
+}
+
+SlotDescriptor* make_test_slot(ObjectPool* pool, int slot_id, int slot_idx, DataTypePtr type,
+                               const std::string& name) {
+    TSlotDescriptor slot_desc;
+    slot_desc.__set_id(slot_id);
+    slot_desc.__set_parent(0);
+    slot_desc.__set_slotType(type->to_thrift());
+    slot_desc.__set_columnPos(slot_idx);
+    slot_desc.__set_byteOffset(0);
+    if (type->is_nullable()) {
+        slot_desc.__set_nullIndicatorByte(slot_idx / 8);
+        slot_desc.__set_nullIndicatorBit(slot_idx % 8);
+    } else {
+        slot_desc.__set_nullIndicatorByte(0);
+        slot_desc.__set_nullIndicatorBit(-1);
+    }
+    slot_desc.__set_slotIdx(slot_idx);
+    slot_desc.__set_isMaterialized(true);
+    slot_desc.__set_colName(name);
+    return pool->add(new SlotDescriptor(slot_desc));
+}
+
+std::vector<SlotDescriptor*> build_slots(ObjectPool* pool) {
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, make_nullable(std::make_shared<DataTypeString>()), "name")};
+}
+
+std::vector<SlotDescriptor*> build_slots_with_required_name(ObjectPool* pool) {
+    return {make_test_slot(pool, 0, 0, make_nullable(std::make_shared<DataTypeInt32>()), "id"),
+            make_test_slot(pool, 1, 1, std::make_shared<DataTypeString>(), "name")};
+}
+
+std::vector<SlotDescriptor*> build_complex_slots(ObjectPool* pool) {
+    auto varchar_type = make_nullable(std::make_shared<DataTypeString>(8, TYPE_VARCHAR));
+    auto array_type = make_nullable(
+            std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeInt32>())));
+    auto map_type = make_nullable(std::make_shared<DataTypeMap>(
+            std::make_shared<DataTypeString>(4, TYPE_CHAR),
+            make_nullable(std::make_shared<DataTypeString>(16, TYPE_VARCHAR))));
+    auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {std::make_shared<DataTypeString>(8, TYPE_VARCHAR),
+                       make_nullable(std::make_shared<DataTypeArray>(
+                               make_nullable(std::make_shared<DataTypeInt32>())))},
+            Strings {"name", "scores"}));
+    return {make_test_slot(pool, 0, 0, varchar_type, "nickname"),
+            make_test_slot(pool, 1, 1, array_type, "tags"),
+            make_test_slot(pool, 2, 2, map_type, "props"),
+            make_test_slot(pool, 3, 3, struct_type, "profile")};
+}
+
+std::unique_ptr<io::FileDescription> file_description(const std::string& path) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->file_size = static_cast<int64_t>(std::filesystem::file_size(path));
+    desc->range_start_offset = 0;
+    desc->range_size = desc->file_size;
+    return desc;
+}
+
+std::filesystem::path write_json_file(const std::string& name, const std::string& content) {
+    const auto test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_json_reader";
+    std::filesystem::create_directories(test_dir);
+    const auto file_path = test_dir / name;
+    std::ofstream out(file_path);
+    out << content;
+    return file_path;
+}
+
+TFileRangeDesc file_range(const std::filesystem::path& file_path) {
+    TFileRangeDesc range;
+    range.__set_path(file_path.string());
+    range.__set_start_offset(0);
+    range.__set_size(static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    range.__set_file_size(static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    return range;
+}
+
+Block make_block(const std::vector<ColumnDefinition>& schema,
+                 const std::vector<int32_t>& local_ids) {
+    Block block;
+    for (const auto local_id : local_ids) {
+        const auto it = std::ranges::find_if(
+                schema, [&](const auto& column) { return column.local_id == local_id; });
+        EXPECT_TRUE(it != schema.end());
+        block.insert({it->type->create_column(), it->type, it->name});
+    }
+    return block;
+}
+
+struct ReadResult {
+    Status status;
+    Status second_status = Status::OK();
+    Block block;
+    size_t rows = 0;
+    bool eof = false;
+    size_t second_rows = 0;
+    bool second_eof = false;
+    std::vector<ColumnDefinition> schema;
+};
+
+ReadResult read_once(const std::string& file_name, const std::string& content,
+                     TFileScanRangeParams params, const std::vector<SlotDescriptor*>& slots,
+                     const std::vector<int32_t>& requested_local_ids, bool read_twice = false) {
+    const auto file_path = write_json_file(file_name, content);
+    auto range = file_range(file_path);
+
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(file_path.string());
+    RuntimeProfile profile("json_v2_reader_test");
+    MockRuntimeState state;
+    JsonReader reader(system_properties, desc, nullptr, &profile, &params, range, slots);
+
+    ReadResult result;
+    result.status = reader.init(&state);
+    if (!result.status.ok()) {
+        return result;
+    }
+    result.status = reader.get_schema(&result.schema);
+    if (!result.status.ok()) {
+        return result;
+    }
+
+    auto request = std::make_shared<FileScanRequest>();
+    for (size_t i = 0; i < requested_local_ids.size(); ++i) {
+        request->local_positions.emplace(LocalColumnId(requested_local_ids[i]), LocalIndex(i));
+    }
+    result.status = reader.open(request);
+    if (!result.status.ok()) {
+        return result;
+    }
+
+    result.block = make_block(result.schema, requested_local_ids);
+    result.status = reader.get_block(&result.block, &result.rows, &result.eof);
+    if (result.status.ok() && read_twice) {
+        auto eof_block = make_block(result.schema, requested_local_ids);
+        result.second_status =
+                reader.get_block(&eof_block, &result.second_rows, &result.second_eof);
+    }
+    return result;
+}
+
+std::string nullable_string_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    return nested.get_data_at(row).to_string();
+}
+
+std::string string_at(const IColumn& column, size_t row) {
+    const auto& nested = assert_cast<const ColumnString&>(column);
+    return nested.get_data_at(row).to_string();
+}
+
+int32_t nullable_int_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+bool nullable_is_null_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    return nullable.is_null_at(row);
+}
+
+class NullableIntGreaterThanExpr final : public VExpr {
+public:
+    NullableIntGreaterThanExpr(size_t block_position, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& data = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    !nullable.is_null_at(source_row) && data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<NullableIntGreaterThanExpr>(_block_position, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    int32_t _value;
+    const std::string _name = "NullableIntGreaterThanExpr";
+};
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto context = VExprContext::create_shared(expr);
+    auto status = context->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = context->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return context;
+}
+
+} // namespace
+
+TEST(JsonReaderTest, ReadsRequestedColumnsInFileScanRequestOrder) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("order.jsonl",
+                            R"({"id":1,"name":"alice"})"
+                            "\n"
+                            R"({"id":2,"name":"bob"})"
+                            "\n",
+                            json_scan_params(), slots, {1, 0}, true);
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.schema.size(), 2);
+    EXPECT_EQ(result.schema[0].name, "id");
+    EXPECT_EQ(result.schema[0].local_id, 0);
+    EXPECT_EQ(result.schema[1].name, "name");
+    EXPECT_EQ(result.schema[1].local_id, 1);
+    ASSERT_EQ(result.rows, 2);
+    ASSERT_EQ(result.block.columns(), 2);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 0), "alice");
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 1), "bob");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 0), 1);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 1), 2);
+    ASSERT_TRUE(result.second_status.ok()) << result.second_status.to_string();
+    EXPECT_EQ(result.second_rows, 0);
+    EXPECT_TRUE(result.second_eof);
+}
+
+TEST(JsonReaderTest, ReadsSingleDocumentOuterArray) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result =
+            read_once("outer_array.json", R"([{"id":3,"name":"carol"},{"id":4,"name":"dave"}])",
+                      json_scan_params(false, true), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 3);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "carol");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 4);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "dave");
+}
+
+TEST(JsonReaderTest, ReadsJsonRootByLine) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("json_root.jsonl",
+                            R"({"payload":{"id":5,"name":"eve"}})"
+                            "\n"
+                            R"({"payload":{"id":6,"name":"frank"}})"
+                            "\n",
+                            json_scan_params(true, false, "", "$.payload"), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 5);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "eve");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 6);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "frank");
+}
+
+TEST(JsonReaderTest, ReadsJsonPathsBySourceSlotAndReturnsRequestedBlockOrder) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("jsonpaths.jsonl",
+                            R"({"payload":{"id":7,"user":"grace"}})"
+                            "\n"
+                            R"({"payload":{"id":8,"user":"heidi"}})"
+                            "\n",
+                            json_scan_params(true, false, R"(["$.payload.id","$.payload.user"])"),
+                            slots, {1, 0});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 0), "grace");
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(0).column, 1), "heidi");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 0), 7);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(1).column, 1), 8);
+}
+
+TEST(JsonReaderTest, ReadsJsonPathsFromSingleDocumentOuterArray) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once(
+            "outer_array_jsonpaths.json",
+            R"([{"payload":{"id":12,"user":"kate"}},{"payload":{"id":13,"user":"leo"}}])",
+            json_scan_params(false, true, R"(["$.payload.id","$.payload.user"])"), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 12);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "kate");
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 13);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "leo");
+}
+
+TEST(JsonReaderTest, FillsMissingNullableColumnWithNull) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("missing_nullable.jsonl",
+                            R"({"id":9})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 1);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 9);
+    EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(1).column, 0));
+}
+
+TEST(JsonReaderTest, ReturnsErrorForMissingRequiredColumn) {
+    ObjectPool pool;
+    auto slots = build_slots_with_required_name(&pool);
+    auto result = read_once("missing_required.jsonl",
+                            R"({"id":10})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    EXPECT_FALSE(result.status.ok());
+}
+
+TEST(JsonReaderTest, ReadsPresentRequiredColumn) {
+    ObjectPool pool;
+    auto slots = build_slots_with_required_name(&pool);
+    auto result = read_once("present_required.jsonl",
+                            R"({"id":14,"name":"mallory"})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.schema.size(), 2);
+    EXPECT_TRUE(result.schema[0].type->is_nullable());
+    EXPECT_FALSE(result.schema[1].type->is_nullable());
+    ASSERT_EQ(result.rows, 1);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 14);
+    EXPECT_EQ(string_at(*result.block.get_by_position(1).column, 0), "mallory");
+}
+
+TEST(JsonReaderTest, SynthesizesComplexFileSchemaFromSlotTypes) {
+    ObjectPool pool;
+    auto slots = build_complex_slots(&pool);
+    const auto file_path = write_json_file("complex_schema.jsonl", "{}\n");
+    auto params = json_scan_params();
+    auto range = file_range(file_path);
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(file_path.string());
+    RuntimeProfile profile("json_v2_reader_complex_schema_test");
+    MockRuntimeState state;
+    JsonReader reader(system_properties, desc, nullptr, &profile, &params, range, slots);
+
+    ASSERT_TRUE(reader.init(&state).ok());
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader.get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 4);
+
+    EXPECT_EQ(schema[0].name, "nickname");
+    EXPECT_EQ(remove_nullable(schema[0].type)->get_primitive_type(), TYPE_STRING);
+
+    ASSERT_EQ(schema[1].children.size(), 1);
+    EXPECT_EQ(schema[1].children[0].name, "element");
+    EXPECT_EQ(schema[1].children[0].local_id, 0);
+    EXPECT_EQ(remove_nullable(schema[1].children[0].type)->get_primitive_type(), TYPE_INT);
+
+    ASSERT_EQ(schema[2].children.size(), 2);
+    EXPECT_EQ(schema[2].children[0].name, "key");
+    EXPECT_EQ(schema[2].children[1].name, "value");
+    EXPECT_EQ(remove_nullable(schema[2].children[0].type)->get_primitive_type(), TYPE_STRING);
+    EXPECT_EQ(remove_nullable(schema[2].children[1].type)->get_primitive_type(), TYPE_STRING);
+
+    ASSERT_EQ(schema[3].children.size(), 2);
+    EXPECT_EQ(schema[3].children[0].name, "name");
+    EXPECT_EQ(schema[3].children[1].name, "scores");
+    EXPECT_EQ(remove_nullable(schema[3].children[0].type)->get_primitive_type(), TYPE_STRING);
+    ASSERT_EQ(schema[3].children[1].children.size(), 1);
+    EXPECT_EQ(schema[3].children[1].children[0].name, "element");
+    EXPECT_EQ(remove_nullable(schema[3].children[1].children[0].type)->get_primitive_type(),
+              TYPE_INT);
+}
+
+TEST(JsonReaderTest, RejectsInvalidFileScanRequestsBeforeOpeningFile) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    const auto file_path = write_json_file("invalid_request.jsonl", "{}\n");
+    auto params = json_scan_params();
+    auto range = file_range(file_path);
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(file_path.string());
+    RuntimeProfile profile("json_v2_reader_invalid_request_test");
+    MockRuntimeState state;
+    JsonReader reader(system_properties, desc, nullptr, &profile, &params, range, slots);
+    ASSERT_TRUE(reader.init(&state).ok());
+
+    auto unknown_column_request = std::make_shared<FileScanRequest>();
+    unknown_column_request->local_positions.emplace(LocalColumnId(9), LocalIndex(0));
+    auto status = reader.open(unknown_column_request);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("unknown local column id 9"), std::string::npos);
+
+    auto invalid_position_request = std::make_shared<FileScanRequest>();
+    invalid_position_request->local_positions.emplace(LocalColumnId(0), LocalIndex(2));
+    status = reader.open(invalid_position_request);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("invalid block position 2"), std::string::npos);
+
+    auto missing_position_request = std::make_shared<FileScanRequest>();
+    missing_position_request->local_positions.emplace(LocalColumnId(0), LocalIndex(1));
+    missing_position_request->local_positions.emplace(LocalColumnId(1), LocalIndex(1));
+    status = reader.open(missing_position_request);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("misses block position 0"), std::string::npos);
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader.get_schema(&schema).ok());
+    auto block = make_block(schema, {0});
+    size_t rows = 0;
+    bool eof = false;
+    status = reader.get_block(&block, &rows, &eof);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("reader is not open"), std::string::npos);
+}
+
+TEST(JsonReaderTest, ReturnsErrorForMalformedJsonByDefault) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("malformed_strict.jsonl",
+                            "not-json\n"
+                            R"({"id":11,"name":"judy"})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    EXPECT_FALSE(result.status.ok());
+}
+
+TEST(JsonReaderTest, IgnoresMalformedJsonAsNullRowsWhenConfigured) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("ignore_malformed.jsonl",
+                            "not-json\n"
+                            R"({"id":11,"name":"judy"})"
+                            "\n",
+                            json_scan_params(true, false, "", "", true), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 2);
+    EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(0).column, 0));
+    EXPECT_TRUE(nullable_is_null_at(*result.block.get_by_position(1).column, 0));
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 1), 11);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 1), "judy");
+}
+
+TEST(JsonReaderTest, SkipsEmptyJsonLine) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    auto result = read_once("empty_line.jsonl",
+                            "\n"
+                            R"({"id":15,"name":"nancy"})"
+                            "\n",
+                            json_scan_params(), slots, {0, 1});
+
+    ASSERT_TRUE(result.status.ok()) << result.status.to_string();
+    ASSERT_EQ(result.rows, 1);
+    EXPECT_EQ(nullable_int_at(*result.block.get_by_position(0).column, 0), 15);
+    EXPECT_EQ(nullable_string_at(*result.block.get_by_position(1).column, 0), "nancy");
+}
+
+// Scenario: JSON, Native, CSV, and Hive text all share the same file-local filter order:
+// delete conjuncts run first, ordinary conjuncts run second, and only ordinary conjuncts contribute
+// to IOContext::predicate_filtered_rows. This guards the JSON caller of the shared helper because
+// CSV/Text already assert the optional profile-counter path.
+TEST(JsonReaderTest, AppliesDeleteAndNormalConjunctsWithPredicateFilterAccounting) {
+    ObjectPool pool;
+    auto slots = build_slots(&pool);
+    const auto file_path = write_json_file("filters.jsonl", R"({"id":1,"name":"alice"})"
+                                                            "\n"
+                                                            R"({"id":2,"name":"bob"})"
+                                                            "\n"
+                                                            R"({"id":3,"name":"carol"})"
+                                                            "\n");
+    auto params = json_scan_params();
+    auto range = file_range(file_path);
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(file_path.string());
+    RuntimeProfile profile("json_v2_reader_filter_test");
+    MockRuntimeState state;
+    auto io_ctx = std::make_shared<io::IOContext>();
+    JsonReader reader(system_properties, desc, io_ctx, &profile, &params, range, slots);
+
+    ASSERT_TRUE(reader.init(&state).ok());
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader.get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    request->local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+    request->local_positions.emplace(LocalColumnId(1), LocalIndex(1));
+    request->delete_conjuncts = {
+            prepared_conjunct(&state, std::make_shared<NullableIntGreaterThanExpr>(0, 1))};
+    request->conjuncts = {
+            prepared_conjunct(&state, std::make_shared<NullableIntGreaterThanExpr>(0, 2))};
+    ASSERT_TRUE(reader.open(request).ok());
+
+    auto block = make_block(schema, {0, 1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader.get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 3);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "carol");
+    EXPECT_EQ(io_ctx->predicate_filtered_rows, 1);
+}
+
+} // namespace doris::format::json
diff --git a/be/test/format_v2/native/native_reader_test.cpp b/be/test/format_v2/native/native_reader_test.cpp
new file mode 100644
index 00000000000000..aaa7aa90e0681e
--- /dev/null
+++ b/be/test/format_v2/native/native_reader_test.cpp
@@ -0,0 +1,419 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/native/native_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "agent/be_exec_version_manager.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "format/native/native_format.h"
+#include "format_v2/column_mapper.h"
+#include "io/fs/local_file_system.h"
+#include "io/io_common.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+#include "util/coding.h"
+#include "util/uid_util.h"
+
+namespace doris::format::native {
+namespace {
+
+std::unique_ptr<io::FileDescription> file_description(const std::string& path) {
+    auto desc = std::make_unique<io::FileDescription>();
+    desc->path = path;
+    desc->file_size = static_cast<int64_t>(std::filesystem::file_size(path));
+    desc->range_start_offset = 0;
+    desc->range_size = desc->file_size;
+    return desc;
+}
+
+Status write_file(const std::string& path, std::string_view content) {
+    io::FileWriterPtr writer;
+    RETURN_IF_ERROR(io::global_local_filesystem()->create_file(path, &writer));
+    if (!content.empty()) {
+        RETURN_IF_ERROR(writer->append({content.data(), content.size()}));
+    }
+    return writer->close();
+}
+
+std::unique_ptr<NativeReader> create_reader(const std::string& path, RuntimeState* state,
+                                            RuntimeProfile* profile,
+                                            std::shared_ptr<io::IOContext> io_ctx = nullptr) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto desc = file_description(path);
+    return std::make_unique<NativeReader>(system_properties, desc, std::move(io_ctx), profile);
+}
+
+Block make_source_block() {
+    auto id_column = ColumnInt32::create();
+    id_column->insert_value(10);
+    id_column->insert_value(20);
+
+    auto name_column = ColumnString::create();
+    name_column->insert_data("alice", 5);
+    name_column->insert_data("bob", 3);
+
+    Block block;
+    block.insert({id_column->get_ptr(), std::make_shared<DataTypeInt32>(), "id"});
+    block.insert({name_column->get_ptr(), std::make_shared<DataTypeString>(), "name"});
+    return block;
+}
+
+Status write_native_file(const std::string& path, const Block& block) {
+    io::FileWriterPtr writer;
+    RETURN_IF_ERROR(io::global_local_filesystem()->create_file(path, &writer));
+    RETURN_IF_ERROR(writer->append({DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC)}));
+
+    uint8_t version_buffer[sizeof(uint32_t)];
+    encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION);
+    RETURN_IF_ERROR(writer->append({version_buffer, sizeof(version_buffer)}));
+
+    PBlock pblock;
+    size_t uncompressed_bytes = 0;
+    size_t compressed_bytes = 0;
+    int64_t compressed_time = 0;
+    RETURN_IF_ERROR(block.serialize(BeExecVersionManager::get_newest_version(), &pblock,
+                                    &uncompressed_bytes, &compressed_bytes, &compressed_time,
+                                    segment_v2::CompressionTypePB::SNAPPY));
+
+    const std::string payload = pblock.SerializeAsString();
+    uint8_t len_buffer[sizeof(uint64_t)];
+    encode_fixed64_le(len_buffer, payload.size());
+    RETURN_IF_ERROR(writer->append({len_buffer, sizeof(len_buffer)}));
+    RETURN_IF_ERROR(writer->append(payload));
+    return writer->close();
+}
+
+Block make_request_block(const std::vector<ColumnDefinition>& schema,
+                         const std::vector<int32_t>& local_ids) {
+    Block block;
+    for (const auto local_id : local_ids) {
+        const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) {
+            return column.local_id == local_id;
+        });
+        DORIS_CHECK(it != schema.end());
+        block.insert({it->type->create_column(), it->type, it->name});
+    }
+    return block;
+}
+
+int32_t nullable_int_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+std::string nullable_string_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    return nested.get_data_at(row).to_string();
+}
+
+class NullableIntGreaterThanExpr final : public VExpr {
+public:
+    NullableIntGreaterThanExpr(size_t block_position, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& data = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    !nullable.is_null_at(source_row) && data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<NullableIntGreaterThanExpr>(_block_position, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    int32_t _value;
+    const std::string _name = "NullableIntGreaterThanExpr";
+};
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto context = VExprContext::create_shared(expr);
+    auto status = context->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = context->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return context;
+}
+
+} // namespace
+
+TEST(NativeV2ReaderTest, SchemaProbeReplaysFirstBlockAndProjectsColumns) {
+    const auto path = "./log/native_v2_reader_" + UniqueId::gen_uid().to_string() + ".native";
+    std::filesystem::create_directories("./log");
+    ASSERT_TRUE(write_native_file(path, make_source_block()).ok());
+
+    RuntimeState state;
+    RuntimeProfile profile("native_v2_reader_test");
+    auto reader = create_reader(path, &state, &profile);
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 2);
+    EXPECT_EQ(schema[0].name, "id");
+    EXPECT_EQ(schema[0].local_id, 0);
+    EXPECT_EQ(schema[1].name, "name");
+    EXPECT_EQ(schema[1].local_id, 1);
+    EXPECT_TRUE(schema[0].type->is_nullable());
+    EXPECT_TRUE(schema[1].type->is_nullable());
+
+    auto request = std::make_shared<FileScanRequest>();
+    FileScanRequestBuilder builder(request.get());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(0)).ok());
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_request_block(schema, {1, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_FALSE(eof);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice");
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "bob");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 0), 10);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 1), 20);
+
+    block.clear_column_data(2);
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_EQ(rows, 0);
+    EXPECT_TRUE(eof);
+    ASSERT_TRUE(reader->close().ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(path));
+}
+
+TEST(NativeV2ReaderTest, AppliesConjunctsAndTracksPredicateFilteredRows) {
+    const auto path =
+            "./log/native_v2_reader_filter_" + UniqueId::gen_uid().to_string() + ".native";
+    std::filesystem::create_directories("./log");
+    ASSERT_TRUE(write_native_file(path, make_source_block()).ok());
+
+    RuntimeState state;
+    RuntimeProfile profile("native_v2_reader_filter_test");
+    auto io_ctx = std::make_shared<io::IOContext>();
+    auto reader = create_reader(path, &state, &profile, io_ctx);
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    FileScanRequestBuilder builder(request.get());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(0)).ok());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok());
+    request->conjuncts = {
+            prepared_conjunct(&state, std::make_shared<NullableIntGreaterThanExpr>(0, 10))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_request_block(schema, {0, 1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 20);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "bob");
+    EXPECT_EQ(io_ctx->predicate_filtered_rows, 1);
+    ASSERT_TRUE(reader->close().ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(path));
+}
+
+TEST(NativeV2ReaderTest, RejectsInvalidHeaderAndEmptyFile) {
+    std::filesystem::create_directories("./log");
+    RuntimeState state;
+    RuntimeProfile profile("native_v2_reader_bad_header_test");
+
+    const auto bad_magic_path =
+            "./log/native_v2_bad_magic_" + UniqueId::gen_uid().to_string() + ".native";
+    std::string bad_magic(sizeof(DORIS_NATIVE_MAGIC) + sizeof(uint32_t), '\0');
+    bad_magic.replace(0, 4, "BAD!");
+    ASSERT_TRUE(write_file(bad_magic_path, bad_magic).ok());
+    auto bad_magic_reader = create_reader(bad_magic_path, &state, &profile);
+    EXPECT_FALSE(bad_magic_reader->init(&state).ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(bad_magic_path));
+
+    const auto empty_path = "./log/native_v2_empty_" + UniqueId::gen_uid().to_string() + ".native";
+    ASSERT_TRUE(write_file(empty_path, "").ok());
+    auto empty_reader = create_reader(empty_path, &state, &profile);
+    EXPECT_FALSE(empty_reader->init(&state).ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(empty_path));
+}
+
+TEST(NativeV2ReaderTest, RejectsUnsupportedVersionAndHeaderOnlyFile) {
+    std::filesystem::create_directories("./log");
+    RuntimeState state;
+    RuntimeProfile profile("native_v2_reader_header_boundary_test");
+
+    const auto bad_version_path =
+            "./log/native_v2_bad_version_" + UniqueId::gen_uid().to_string() + ".native";
+    std::string bad_version;
+    bad_version.append(DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC));
+    uint8_t version_buffer[sizeof(uint32_t)];
+    encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION + 1);
+    bad_version.append(reinterpret_cast<const char*>(version_buffer), sizeof(version_buffer));
+    ASSERT_TRUE(write_file(bad_version_path, bad_version).ok());
+    auto bad_version_reader = create_reader(bad_version_path, &state, &profile);
+    EXPECT_FALSE(bad_version_reader->init(&state).ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(bad_version_path));
+
+    const auto header_only_path =
+            "./log/native_v2_header_only_" + UniqueId::gen_uid().to_string() + ".native";
+    std::string header_only;
+    header_only.append(DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC));
+    encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION);
+    header_only.append(reinterpret_cast<const char*>(version_buffer), sizeof(version_buffer));
+    ASSERT_TRUE(write_file(header_only_path, header_only).ok());
+    auto header_only_reader = create_reader(header_only_path, &state, &profile);
+    ASSERT_TRUE(header_only_reader->init(&state).ok());
+    std::vector<ColumnDefinition> schema;
+    EXPECT_FALSE(header_only_reader->get_schema(&schema).ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(header_only_path));
+}
+
+TEST(NativeV2ReaderTest, RejectsTruncatedBlockDuringSchemaProbe) {
+    const auto path = "./log/native_v2_truncated_" + UniqueId::gen_uid().to_string() + ".native";
+    std::filesystem::create_directories("./log");
+
+    std::string content;
+    content.append(DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC));
+    uint8_t version_buffer[sizeof(uint32_t)];
+    encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION);
+    content.append(reinterpret_cast<const char*>(version_buffer), sizeof(version_buffer));
+    uint8_t len_buffer[sizeof(uint64_t)];
+    encode_fixed64_le(len_buffer, 8);
+    content.append(reinterpret_cast<const char*>(len_buffer), sizeof(len_buffer));
+    content.append("x");
+    ASSERT_TRUE(write_file(path, content).ok());
+
+    RuntimeState state;
+    RuntimeProfile profile("native_v2_reader_truncated_test");
+    auto reader = create_reader(path, &state, &profile);
+    ASSERT_TRUE(reader->init(&state).ok());
+    std::vector<ColumnDefinition> schema;
+    EXPECT_FALSE(reader->get_schema(&schema).ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(path));
+}
+
+TEST(NativeV2ReaderTest, RejectsZeroLengthBlockAndInvalidPBlock) {
+    std::filesystem::create_directories("./log");
+    RuntimeState state;
+    RuntimeProfile profile("native_v2_reader_bad_block_test");
+
+    auto build_header = [] {
+        std::string content;
+        content.append(DORIS_NATIVE_MAGIC, sizeof(DORIS_NATIVE_MAGIC));
+        uint8_t version_buffer[sizeof(uint32_t)];
+        encode_fixed32_le(version_buffer, DORIS_NATIVE_FORMAT_VERSION);
+        content.append(reinterpret_cast<const char*>(version_buffer), sizeof(version_buffer));
+        return content;
+    };
+
+    const auto zero_len_path =
+            "./log/native_v2_zero_len_" + UniqueId::gen_uid().to_string() + ".native";
+    auto zero_len_content = build_header();
+    uint8_t len_buffer[sizeof(uint64_t)];
+    encode_fixed64_le(len_buffer, 0);
+    zero_len_content.append(reinterpret_cast<const char*>(len_buffer), sizeof(len_buffer));
+    ASSERT_TRUE(write_file(zero_len_path, zero_len_content).ok());
+    auto zero_len_reader = create_reader(zero_len_path, &state, &profile);
+    ASSERT_TRUE(zero_len_reader->init(&state).ok());
+    std::vector<ColumnDefinition> schema;
+    EXPECT_FALSE(zero_len_reader->get_schema(&schema).ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(zero_len_path));
+
+    const auto invalid_pblock_path =
+            "./log/native_v2_invalid_pblock_" + UniqueId::gen_uid().to_string() + ".native";
+    auto invalid_pblock_content = build_header();
+    encode_fixed64_le(len_buffer, 1);
+    invalid_pblock_content.append(reinterpret_cast<const char*>(len_buffer), sizeof(len_buffer));
+    invalid_pblock_content.append("x");
+    ASSERT_TRUE(write_file(invalid_pblock_path, invalid_pblock_content).ok());
+    auto invalid_pblock_reader = create_reader(invalid_pblock_path, &state, &profile);
+    ASSERT_TRUE(invalid_pblock_reader->init(&state).ok());
+    schema.clear();
+    EXPECT_FALSE(invalid_pblock_reader->get_schema(&schema).ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(invalid_pblock_path));
+}
+
+TEST(NativeV2ReaderTest, RejectsUnknownRequestedLocalColumn) {
+    const auto path =
+            "./log/native_v2_unknown_column_" + UniqueId::gen_uid().to_string() + ".native";
+    std::filesystem::create_directories("./log");
+    ASSERT_TRUE(write_native_file(path, make_source_block()).ok());
+
+    RuntimeState state;
+    RuntimeProfile profile("native_v2_reader_unknown_column_test");
+    auto reader = create_reader(path, &state, &profile);
+    ASSERT_TRUE(reader->init(&state).ok());
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<FileScanRequest>();
+    FileScanRequestBuilder builder(request.get());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(42)).ok());
+    ASSERT_TRUE(reader->open(request).ok());
+    Block block;
+    block.insert({schema[0].type->create_column(), schema[0].type, schema[0].name});
+    size_t rows = 0;
+    bool eof = false;
+    EXPECT_FALSE(reader->get_block(&block, &rows, &eof).ok());
+    static_cast<void>(io::global_local_filesystem()->delete_file(path));
+}
+
+} // namespace doris::format::native
diff --git a/be/test/format_v2/parquet/parquet_column_reader_test.cpp b/be/test/format_v2/parquet/parquet_column_reader_test.cpp
new file mode 100644
index 00000000000000..91382203c5cea9
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_column_reader_test.cpp
@@ -0,0 +1,3620 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <array>
+#include <filesystem>
+#include <functional>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_array.h"
+#include "core/column/column_decimal.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/types.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/parquet/selection_vector.h"
+
+namespace doris::format::parquet {
+namespace {
+
+constexpr int64_t ROW_COUNT = 5;
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+template <typename ColumnType>
+const ColumnType& get_nullable_nested_column(const IColumn& column) {
+    // File-local schema exposed by the parquet reader follows Doris external-table semantics:
+    // nested STRUCT fields, LIST elements, and MAP keys/values are nullable even when the parquet
+    // field is required.
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    return assert_cast<const ColumnType&>(nullable_column.get_nested_column());
+}
+
+ParquetColumnSchema mock_column_schema() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "mock";
+    schema.type = std::make_shared<DataTypeInt32>();
+    return schema;
+}
+
+class BaseUnsupportedReader final : public ParquetColumnReader {
+public:
+    BaseUnsupportedReader()
+            : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {}
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override { return Status::OK(); }
+};
+
+class DefaultSelectReader final : public ParquetColumnReader {
+public:
+    DefaultSelectReader() : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {}
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override {
+        auto& values = assert_cast<ColumnInt32&>(*column);
+        for (int64_t row = 0; row < rows; ++row) {
+            values.insert_value(static_cast<int32_t>(_cursor + row));
+        }
+        _cursor += rows;
+        *rows_read = rows;
+        _read_ranges.push_back(rows);
+        return Status::OK();
+    }
+
+    Status skip(int64_t rows) override {
+        _cursor += rows;
+        _skip_ranges.push_back(rows);
+        return Status::OK();
+    }
+
+    const std::vector<int64_t>& read_ranges() const { return _read_ranges; }
+    const std::vector<int64_t>& skip_ranges() const { return _skip_ranges; }
+
+private:
+    int64_t _cursor = 0;
+    std::vector<int64_t> _read_ranges;
+    std::vector<int64_t> _skip_ranges;
+};
+
+class NestedSkipReader final : public ParquetColumnReader {
+public:
+    NestedSkipReader() : ParquetColumnReader(mock_column_schema(), mock_column_schema().type) {}
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override { return Status::OK(); }
+
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override {
+        auto& values = assert_cast<ColumnInt32&>(*column);
+        for (int64_t row = 0; row < length_upper_bound; ++row) {
+            values.insert_value(static_cast<int32_t>(row));
+        }
+        *values_read = length_upper_bound;
+        return Status::OK();
+    }
+};
+
+class ParquetColumnReaderTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_parquet_column_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "reader.parquet").string();
+        write_parquet_file();
+        _file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+        auto metadata = _file_reader->metadata();
+        ASSERT_EQ(metadata->num_row_groups(), 1);
+        _row_group = _file_reader->RowGroup(0);
+        ASSERT_NE(_row_group, nullptr);
+        auto schema_descriptor = _file_reader->metadata()->schema();
+        ASSERT_NE(schema_descriptor, nullptr);
+        auto st = build_parquet_column_schema(*schema_descriptor, &_fields);
+        ASSERT_TRUE(st.ok()) << st;
+        ASSERT_EQ(_fields.size(), _expected_by_field.size());
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+    template <typename Builder, typename Value>
+    std::shared_ptr<arrow::Array> build_required_array(const std::vector<Value>& values) {
+        Builder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+        arrow::StringBuilder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int32_array() {
+        arrow::Int32Builder builder;
+        EXPECT_TRUE(builder.Append(1).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(3).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(5).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_all_null_int32_array() {
+        arrow::Int32Builder builder;
+        for (int64_t row = 0; row < ROW_COUNT; ++row) {
+            EXPECT_TRUE(builder.AppendNull().ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_struct_array() {
+        auto struct_type = arrow::struct_({arrow::field("a", arrow::int32(), false),
+                                           arrow::field("b", arrow::utf8(), false)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+        auto* b_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+        const std::vector<int32_t> a_values = {101, 102, 103, 104, 105};
+        const std::vector<std::string> b_values = {"sa", "sb", "sc", "sd", "se"};
+        for (size_t row = 0; row < a_values.size(); ++row) {
+            EXPECT_TRUE(builder.Append().ok());
+            EXPECT_TRUE(a_builder->Append(a_values[row]).ok());
+            EXPECT_TRUE(b_builder->Append(b_values[row]).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_array() {
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+        auto* b_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(201).ok());
+        EXPECT_TRUE(b_builder->Append("nsa").ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(203).ok());
+        EXPECT_TRUE(b_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(204).ok());
+        EXPECT_TRUE(b_builder->Append("nsd").ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_with_decimal_array() {
+        auto decimal_type = arrow::decimal128(38, 6);
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("d", decimal_type, true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto d_array_builder = std::make_unique<arrow::Decimal128Builder>(
+                decimal_type, arrow::default_memory_pool());
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(d_array_builder)));
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+        auto* d_builder = assert_cast<arrow::Decimal128Builder*>(builder.field_builder(1));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(301).ok());
+        EXPECT_TRUE(d_builder->Append(arrow::Decimal128(123456789)).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(303).ok());
+        EXPECT_TRUE(d_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(304).ok());
+        EXPECT_TRUE(d_builder->Append(arrow::Decimal128(-987654321)).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_with_list_array() {
+        auto list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("xs", list_type, true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 value_builder, list_type);
+        field_builders.push_back(list_builder);
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(301).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append(2).ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(303).ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(304).ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(305).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(5).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_with_map_array() {
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("kv", map_type, true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_builder = std::make_shared<arrow::MapBuilder>(
+                arrow::default_memory_pool(), key_builder, value_builder, map_type);
+        field_builders.push_back(map_builder);
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(401).ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append("one").ok());
+        EXPECT_TRUE(key_builder->Append(2).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(403).ok());
+        EXPECT_TRUE(map_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(404).ok());
+        EXPECT_TRUE(map_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(a_builder->Append(405).ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(5).ok());
+        EXPECT_TRUE(value_builder->Append("five").ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_with_nested_struct_list_array() {
+        auto list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto nested_type = arrow::struct_({arrow::field("xs", list_type, true)});
+        auto struct_type = arrow::struct_({arrow::field("nested", nested_type, true)});
+
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 value_builder, list_type);
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> nested_field_builders;
+        nested_field_builders.push_back(list_builder);
+        auto nested_builder = std::make_shared<arrow::StructBuilder>(
+                nested_type, arrow::default_memory_pool(), std::move(nested_field_builders));
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        field_builders.push_back(nested_builder);
+        arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                     std::move(field_builders));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(nested_builder->Append().ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(7).ok());
+        EXPECT_TRUE(value_builder->Append(8).ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(nested_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(nested_builder->Append().ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(nested_builder->Append().ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_int_list_array() {
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder,
+                                   arrow::list(arrow::field("element", arrow::int32(), false)));
+        const std::vector<std::vector<int32_t>> values = {
+                {1, 2}, {3}, {4, 5, 6}, {7}, {8, 9},
+        };
+        for (const auto& row : values) {
+            EXPECT_TRUE(builder.Append().ok());
+            for (const auto value : row) {
+                EXPECT_TRUE(value_builder->Append(value).ok());
+            }
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_list_array() {
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder,
+                                   arrow::list(arrow::field("element", arrow::int32(), true)));
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->Append(10).ok());
+        EXPECT_TRUE(value_builder->Append(20).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(30).ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->Append(40).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_nullable_int_list_array() {
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder,
+                                   arrow::list(arrow::field("element", arrow::int32(), true)));
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(110).ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->Append(120).ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(value_builder->Append(130).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_struct_list_array() {
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+        auto struct_builder = std::make_shared<arrow::StructBuilder>(
+                struct_type, arrow::default_memory_pool(), std::move(field_builders));
+        arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder,
+                                   arrow::list(arrow::field("element", struct_type, true)));
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(struct_builder->field_builder(0));
+        auto* b_builder = assert_cast<arrow::StringBuilder*>(struct_builder->field_builder(1));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(11).ok());
+        EXPECT_TRUE(b_builder->Append("la").ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(12).ok());
+        EXPECT_TRUE(b_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->AppendNull().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(13).ok());
+        EXPECT_TRUE(b_builder->Append("ld").ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(14).ok());
+        EXPECT_TRUE(b_builder->Append("le").ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_list_list_int_array() {
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        auto inner_list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto inner_list_builder = std::make_shared<arrow::ListBuilder>(
+                arrow::default_memory_pool(), value_builder, inner_list_type);
+        arrow::ListBuilder builder(arrow::default_memory_pool(), inner_list_builder,
+                                   arrow::list(arrow::field("element", inner_list_type, true)));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(inner_list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append(2).ok());
+        EXPECT_TRUE(inner_list_builder->AppendEmptyValue().ok());
+        EXPECT_TRUE(inner_list_builder->AppendNull().ok());
+        EXPECT_TRUE(inner_list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(3).ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(inner_list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(4).ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(inner_list_builder->AppendEmptyValue().ok());
+        EXPECT_TRUE(inner_list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(5).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_int_string_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), false));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder,
+                                  map_type);
+        const std::vector<std::vector<std::pair<int32_t, std::string>>> values = {
+                {{1, "a"}, {2, "b"}}, {{3, "c"}},           {{4, "d"}, {5, "e"}, {6, "f"}},
+                {{7, "g"}},           {{8, "h"}, {9, "i"}},
+        };
+        for (const auto& row : values) {
+            EXPECT_TRUE(builder.Append().ok());
+            for (const auto& [key, value] : row) {
+                EXPECT_TRUE(key_builder->Append(key).ok());
+                EXPECT_TRUE(value_builder->Append(value).ok());
+            }
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_string_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder,
+                                  map_type);
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(10).ok());
+        EXPECT_TRUE(value_builder->Append("aa").ok());
+        EXPECT_TRUE(key_builder->Append(20).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(30).ok());
+        EXPECT_TRUE(value_builder->Append("cc").ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(40).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_required_nullable_string_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder,
+                                  map_type);
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(101).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(102).ok());
+        EXPECT_TRUE(value_builder->Append("bb").ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(103).ok());
+        EXPECT_TRUE(value_builder->Append("cc").ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(104).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_struct_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto struct_type = arrow::struct_(
+                {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+        auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+        auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+        field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+        auto value_builder = std::make_shared<arrow::StructBuilder>(
+                struct_type, arrow::default_memory_pool(), std::move(field_builders));
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", struct_type, true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder,
+                                  map_type);
+        auto* a_builder = assert_cast<arrow::Int32Builder*>(value_builder->field_builder(0));
+        auto* b_builder = assert_cast<arrow::StringBuilder*>(value_builder->field_builder(1));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(101).ok());
+        EXPECT_TRUE(value_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(21).ok());
+        EXPECT_TRUE(b_builder->Append("ma").ok());
+        EXPECT_TRUE(key_builder->Append(102).ok());
+        EXPECT_TRUE(value_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(22).ok());
+        EXPECT_TRUE(b_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(103).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(104).ok());
+        EXPECT_TRUE(value_builder->Append().ok());
+        EXPECT_TRUE(a_builder->Append(24).ok());
+        EXPECT_TRUE(b_builder->Append("me").ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_list_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::Int32Builder>();
+        auto list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 value_builder, list_type);
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, list_builder,
+                                  map_type);
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(201).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append(2).ok());
+        EXPECT_TRUE(key_builder->Append(202).ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(203).ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(204).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(value_builder->Append(3).ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(205).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(value_builder->Append(4).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_map_list_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto value_builder = std::make_shared<arrow::StringBuilder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto map_builder = std::make_shared<arrow::MapBuilder>(
+                arrow::default_memory_pool(), key_builder, value_builder, map_type);
+        arrow::ListBuilder builder(arrow::default_memory_pool(), map_builder,
+                                   arrow::list(arrow::field("element", map_type, true)));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(1).ok());
+        EXPECT_TRUE(value_builder->Append("a").ok());
+        EXPECT_TRUE(key_builder->Append(2).ok());
+        EXPECT_TRUE(value_builder->AppendNull().ok());
+        EXPECT_TRUE(map_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(map_builder->AppendNull().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(3).ok());
+        EXPECT_TRUE(value_builder->Append("c").ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(4).ok());
+        EXPECT_TRUE(value_builder->Append("d").ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int_map_map_array() {
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto nested_key_builder = std::make_shared<arrow::Int32Builder>();
+        auto nested_value_builder = std::make_shared<arrow::StringBuilder>();
+        auto nested_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto nested_map_builder = std::make_shared<arrow::MapBuilder>(
+                arrow::default_memory_pool(), nested_key_builder, nested_value_builder,
+                nested_map_type);
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", nested_map_type, true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, nested_map_builder,
+                                  map_type);
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(10).ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(101).ok());
+        EXPECT_TRUE(nested_value_builder->Append("aa").ok());
+        EXPECT_TRUE(key_builder->Append(20).ok());
+        EXPECT_TRUE(nested_map_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(30).ok());
+        EXPECT_TRUE(nested_map_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(40).ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(401).ok());
+        EXPECT_TRUE(nested_value_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_deep_list_struct_map_list_array() {
+        auto element_builder = std::make_shared<arrow::Int32Builder>();
+        auto list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 element_builder, list_type);
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true));
+        auto map_builder = std::make_shared<arrow::MapBuilder>(arrow::default_memory_pool(),
+                                                               key_builder, list_builder, map_type);
+        auto struct_type = arrow::struct_({arrow::field("kv", map_type, true)});
+        std::vector<std::shared_ptr<arrow::ArrayBuilder>> struct_field_builders;
+        struct_field_builders.push_back(map_builder);
+        auto struct_builder = std::make_shared<arrow::StructBuilder>(
+                struct_type, arrow::default_memory_pool(), std::move(struct_field_builders));
+        arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder,
+                                   arrow::list(arrow::field("element", struct_type, true)));
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(1).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(element_builder->Append(10).ok());
+        EXPECT_TRUE(element_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(2).ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+        EXPECT_TRUE(struct_builder->AppendNull().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(map_builder->AppendNull().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(map_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_builder->Append().ok());
+        EXPECT_TRUE(map_builder->Append().ok());
+        EXPECT_TRUE(key_builder->Append(3).ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(4).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(element_builder->Append(40).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_deep_map_list_map_array() {
+        auto nested_key_builder = std::make_shared<arrow::Int32Builder>();
+        auto nested_value_builder = std::make_shared<arrow::StringBuilder>();
+        auto nested_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto nested_map_builder = std::make_shared<arrow::MapBuilder>(
+                arrow::default_memory_pool(), nested_key_builder, nested_value_builder,
+                nested_map_type);
+        auto list_type = arrow::list(arrow::field("element", nested_map_type, true));
+        auto list_builder = std::make_shared<arrow::ListBuilder>(arrow::default_memory_pool(),
+                                                                 nested_map_builder, list_type);
+        auto key_builder = std::make_shared<arrow::Int32Builder>();
+        auto map_type = arrow::map(arrow::int32(), arrow::field("value", list_type, true));
+        arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, list_builder,
+                                  map_type);
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(10).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(1).ok());
+        EXPECT_TRUE(nested_value_builder->Append("a").ok());
+        EXPECT_TRUE(nested_key_builder->Append(2).ok());
+        EXPECT_TRUE(nested_value_builder->AppendNull().ok());
+        EXPECT_TRUE(nested_map_builder->AppendEmptyValue().ok());
+        EXPECT_TRUE(nested_map_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(20).ok());
+        EXPECT_TRUE(list_builder->AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(30).ok());
+        EXPECT_TRUE(list_builder->AppendNull().ok());
+        EXPECT_TRUE(key_builder->Append(40).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(3).ok());
+        EXPECT_TRUE(nested_value_builder->Append("c").ok());
+
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(key_builder->Append(50).ok());
+        EXPECT_TRUE(list_builder->Append().ok());
+        EXPECT_TRUE(nested_map_builder->AppendNull().ok());
+        EXPECT_TRUE(nested_map_builder->Append().ok());
+        EXPECT_TRUE(nested_key_builder->Append(4).ok());
+        EXPECT_TRUE(nested_value_builder->Append("d").ok());
+        return finish_array(&builder);
+    }
+
+    void add_field(const std::shared_ptr<arrow::Field>& field, std::shared_ptr<arrow::Array> array,
+                   std::function<void(const ParquetColumnSchema&, const IColumn&)> validator) {
+        _arrow_fields.push_back(field);
+        _arrays.push_back(std::move(array));
+        _expected_by_field.push_back(std::move(validator));
+    }
+
+    void write_parquet_file() {
+        add_field(arrow::field("int32_col", arrow::int32(), false),
+                  build_required_array<arrow::Int32Builder, int32_t>({10, 20, 30, 40, 50}),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32);
+                      const auto& values = assert_cast<const ColumnInt32&>(column);
+                      EXPECT_EQ(values.get_element(0), 10);
+                      EXPECT_EQ(values.get_element(4), 50);
+                  });
+        add_field(arrow::field("string_col", arrow::utf8(), false),
+                  build_string_array({"alpha", "beta", "gamma", "delta", "epsilon"}),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type_descriptor.is_string_like);
+                      const auto& values = assert_cast<const ColumnString&>(column);
+                      EXPECT_EQ(values.get_data_at(0).to_string(), "alpha");
+                      EXPECT_EQ(values.get_data_at(4).to_string(), "epsilon");
+                  });
+        add_field(arrow::field("nullable_int_col", arrow::int32(), true),
+                  build_nullable_int32_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      const auto& nested_column =
+                              assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_TRUE(nullable_column.is_null_at(3));
+                      EXPECT_EQ(nested_column.get_element(0), 1);
+                      EXPECT_EQ(nested_column.get_element(2), 3);
+                  });
+        add_field(arrow::field("all_null_int_col", arrow::int32(), true),
+                  build_all_null_int32_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      for (size_t row = 0; row < ROW_COUNT; ++row) {
+                          EXPECT_TRUE(nullable_column.is_null_at(row));
+                      }
+                  });
+        add_field(arrow::field("struct_col",
+                               arrow::struct_({
+                                       arrow::field("a", arrow::int32(), false),
+                                       arrow::field("b", arrow::utf8(), false),
+                               }),
+                               false),
+                  build_required_struct_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_STRUCT);
+                      const auto& struct_column = assert_cast<const ColumnStruct&>(column);
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& b_values =
+                              get_nullable_nested_column<ColumnString>(struct_column.get_column(1));
+                      EXPECT_EQ(a_values.get_element(0), 101);
+                      EXPECT_EQ(a_values.get_element(4), 105);
+                      EXPECT_EQ(b_values.get_data_at(1).to_string(), "sb");
+                      EXPECT_EQ(b_values.get_data_at(4).to_string(), "se");
+                  });
+        add_field(arrow::field("nullable_struct_col",
+                               arrow::struct_({
+                                       arrow::field("a", arrow::int32(), false),
+                                       arrow::field("b", arrow::utf8(), true),
+                               }),
+                               true),
+                  build_nullable_struct_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_TRUE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& b_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      const auto& b_nested =
+                              assert_cast<const ColumnString&>(b_values.get_nested_column());
+                      EXPECT_EQ(a_values.get_element(0), 201);
+                      EXPECT_EQ(a_values.get_element(2), 203);
+                      EXPECT_EQ(a_values.get_element(3), 204);
+                      EXPECT_FALSE(b_values.is_null_at(0));
+                      EXPECT_TRUE(b_values.is_null_at(2));
+                      EXPECT_FALSE(b_values.is_null_at(3));
+                      EXPECT_EQ(b_nested.get_data_at(0).to_string(), "nsa");
+                      EXPECT_EQ(b_nested.get_data_at(3).to_string(), "nsd");
+                  });
+        add_field(arrow::field("nullable_struct_decimal_col",
+                               arrow::struct_({
+                                       arrow::field("a", arrow::int32(), false),
+                                       arrow::field("d", arrow::decimal128(38, 6), true),
+                               }),
+                               true),
+                  build_nullable_struct_with_decimal_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_TRUE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& d_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      const auto& d_nested =
+                              assert_cast<const ColumnDecimal128V3&>(d_values.get_nested_column());
+                      EXPECT_EQ(a_values.get_element(0), 301);
+                      EXPECT_EQ(a_values.get_element(2), 303);
+                      EXPECT_EQ(a_values.get_element(3), 304);
+                      EXPECT_FALSE(d_values.is_null_at(0));
+                      EXPECT_TRUE(d_values.is_null_at(2));
+                      EXPECT_FALSE(d_values.is_null_at(3));
+                      EXPECT_EQ(d_nested.get_element(0), Decimal128V3(123456789));
+                      EXPECT_EQ(d_nested.get_element(3), Decimal128V3(-987654321));
+                  });
+        auto struct_list_type = arrow::struct_({
+                arrow::field("a", arrow::int32(), false),
+                arrow::field("xs", arrow::list(arrow::field("element", arrow::int32(), true)),
+                             true),
+        });
+        add_field(arrow::field("nullable_struct_list_col", struct_list_type, true),
+                  build_nullable_struct_with_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      EXPECT_EQ(a_values.get_element(0), 301);
+                      EXPECT_EQ(a_values.get_element(2), 303);
+                      EXPECT_EQ(a_values.get_element(3), 304);
+                      EXPECT_EQ(a_values.get_element(4), 305);
+
+                      const auto& xs_nullable =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      ASSERT_EQ(xs_nullable.size(), ROW_COUNT);
+                      EXPECT_FALSE(xs_nullable.is_null_at(0));
+                      EXPECT_FALSE(xs_nullable.is_null_at(2));
+                      EXPECT_TRUE(xs_nullable.is_null_at(3));
+                      EXPECT_FALSE(xs_nullable.is_null_at(4));
+                      const auto& xs_array =
+                              assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+                      const auto& offsets = xs_array.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 2);
+                      EXPECT_EQ(offsets[4], 4);
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(xs_array.get_data());
+                      ASSERT_EQ(elements.size(), 4);
+                      EXPECT_FALSE(elements.is_null_at(0));
+                      EXPECT_FALSE(elements.is_null_at(1));
+                      EXPECT_TRUE(elements.is_null_at(2));
+                      EXPECT_FALSE(elements.is_null_at(3));
+                      const auto& values =
+                              assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                      EXPECT_EQ(values.get_element(0), 1);
+                      EXPECT_EQ(values.get_element(1), 2);
+                      EXPECT_EQ(values.get_element(3), 5);
+                  });
+        auto struct_map_type = arrow::struct_({
+                arrow::field("a", arrow::int32(), false),
+                arrow::field("kv",
+                             arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)),
+                             true),
+        });
+        add_field(arrow::field("nullable_struct_map_col", struct_map_type, true),
+                  build_nullable_struct_with_map_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      ASSERT_EQ(struct_column.get_columns().size(), 2);
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      EXPECT_EQ(a_values.get_element(0), 401);
+                      EXPECT_EQ(a_values.get_element(2), 403);
+                      EXPECT_EQ(a_values.get_element(3), 404);
+                      EXPECT_EQ(a_values.get_element(4), 405);
+
+                      const auto& kv_nullable =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      ASSERT_EQ(kv_nullable.size(), ROW_COUNT);
+                      EXPECT_FALSE(kv_nullable.is_null_at(0));
+                      EXPECT_FALSE(kv_nullable.is_null_at(2));
+                      EXPECT_TRUE(kv_nullable.is_null_at(3));
+                      EXPECT_FALSE(kv_nullable.is_null_at(4));
+                      const auto& kv_map =
+                              assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+                      const auto& offsets = kv_map.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 2);
+                      EXPECT_EQ(offsets[4], 3);
+                      const auto& keys = get_nullable_nested_column<ColumnInt32>(kv_map.get_keys());
+                      const auto& values = assert_cast<const ColumnNullable&>(kv_map.get_values());
+                      const auto& value_data =
+                              assert_cast<const ColumnString&>(values.get_nested_column());
+                      ASSERT_EQ(keys.size(), 3);
+                      ASSERT_EQ(values.size(), 3);
+                      EXPECT_EQ(keys.get_element(0), 1);
+                      EXPECT_EQ(keys.get_element(1), 2);
+                      EXPECT_EQ(keys.get_element(2), 5);
+                      EXPECT_EQ(value_data.get_data_at(0).to_string(), "one");
+                      EXPECT_TRUE(values.is_null_at(1));
+                      EXPECT_EQ(value_data.get_data_at(2).to_string(), "five");
+                  });
+        auto nested_struct_list_type = arrow::struct_({
+                arrow::field("nested",
+                             arrow::struct_({
+                                     arrow::field("xs",
+                                                  arrow::list(arrow::field("element",
+                                                                           arrow::int32(), true)),
+                                                  true),
+                             }),
+                             true),
+        });
+        add_field(arrow::field("nullable_struct_nested_struct_list_col", nested_struct_list_type,
+                               true),
+                  build_nullable_struct_with_nested_struct_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+                      const auto& nested_nullable =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+                      EXPECT_FALSE(nested_nullable.is_null_at(0));
+                      EXPECT_TRUE(nested_nullable.is_null_at(2));
+                      EXPECT_FALSE(nested_nullable.is_null_at(3));
+                      EXPECT_FALSE(nested_nullable.is_null_at(4));
+                  });
+        add_field(arrow::field("list_int_col",
+                               arrow::list(arrow::field("element", arrow::int32(), false)), false),
+                  build_required_int_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_ARRAY);
+                      const auto* array_type =
+                              assert_cast<const DataTypeArray*>(remove_nullable(schema.type).get());
+                      EXPECT_EQ(
+                              remove_nullable(array_type->get_nested_type())->get_primitive_type(),
+                              TYPE_INT);
+                      const auto& array_column = assert_cast<const ColumnArray&>(column);
+                      ASSERT_EQ(array_column.size(), ROW_COUNT);
+                      const auto array_size_at = [&array_column](size_t row_idx) {
+                          return array_column.get_offsets()[row_idx] -
+                                 (row_idx == 0 ? 0 : array_column.get_offsets()[row_idx - 1]);
+                      };
+                      EXPECT_EQ(array_size_at(0), 2);
+                      EXPECT_EQ(array_size_at(1), 1);
+                      EXPECT_EQ(array_size_at(2), 3);
+                      EXPECT_EQ(array_size_at(4), 2);
+                      const auto& values =
+                              get_nullable_nested_column<ColumnInt32>(array_column.get_data());
+                      ASSERT_EQ(values.size(), 9);
+                      EXPECT_EQ(values.get_element(0), 1);
+                      EXPECT_EQ(values.get_element(5), 6);
+                      EXPECT_EQ(values.get_element(8), 9);
+                  });
+        add_field(arrow::field("nullable_list_int_col",
+                               arrow::list(arrow::field("element", arrow::int32(), true)), true),
+                  build_nullable_int_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      const auto& array_column =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& offsets = array_column.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 4);
+                      EXPECT_EQ(offsets[4], 5);
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(array_column.get_data());
+                      const auto& values =
+                              assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                      ASSERT_EQ(elements.size(), 5);
+                      EXPECT_EQ(values.get_element(0), 10);
+                      EXPECT_EQ(values.get_element(1), 20);
+                      EXPECT_TRUE(elements.is_null_at(2));
+                      EXPECT_EQ(values.get_element(3), 30);
+                      EXPECT_EQ(values.get_element(4), 40);
+                  });
+        add_field(arrow::field("required_nullable_list_int_col",
+                               arrow::list(arrow::field("element", arrow::int32(), true)), false),
+                  build_required_nullable_int_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_FALSE(schema.type->is_nullable());
+                      const auto& array_column = assert_cast<const ColumnArray&>(column);
+                      const auto& offsets = array_column.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 0);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 3);
+                      EXPECT_EQ(offsets[3], 5);
+                      EXPECT_EQ(offsets[4], 5);
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(array_column.get_data());
+                      ASSERT_EQ(elements.size(), 5);
+                      EXPECT_TRUE(elements.is_null_at(0));
+                      EXPECT_FALSE(elements.is_null_at(1));
+                      EXPECT_TRUE(elements.is_null_at(4));
+                  });
+        auto list_struct_type = arrow::struct_({
+                arrow::field("a", arrow::int32(), false),
+                arrow::field("b", arrow::utf8(), true),
+        });
+        add_field(arrow::field("nullable_list_struct_col",
+                               arrow::list(arrow::field("element", list_struct_type, true)), true),
+                  build_nullable_struct_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& array_column =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& offsets = array_column.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 4);
+                      EXPECT_EQ(offsets[4], 5);
+
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(array_column.get_data());
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(elements.get_nested_column());
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& b_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      const auto& b_data =
+                              assert_cast<const ColumnString&>(b_values.get_nested_column());
+                      ASSERT_EQ(elements.size(), 5);
+                      EXPECT_FALSE(elements.is_null_at(0));
+                      EXPECT_FALSE(elements.is_null_at(1));
+                      EXPECT_TRUE(elements.is_null_at(2));
+                      EXPECT_FALSE(elements.is_null_at(3));
+                      EXPECT_EQ(a_values.get_element(0), 11);
+                      EXPECT_EQ(a_values.get_element(1), 12);
+                      EXPECT_EQ(a_values.get_element(3), 13);
+                      EXPECT_EQ(a_values.get_element(4), 14);
+                      EXPECT_EQ(b_data.get_data_at(0).to_string(), "la");
+                      EXPECT_TRUE(b_values.is_null_at(1));
+                      EXPECT_EQ(b_data.get_data_at(3).to_string(), "ld");
+                      EXPECT_EQ(b_data.get_data_at(4).to_string(), "le");
+                  });
+        auto nested_list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        add_field(arrow::field("nullable_list_list_int_col",
+                               arrow::list(arrow::field("element", nested_list_type, true)), true),
+                  build_nullable_list_list_int_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& outer_array =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& outer_offsets = outer_array.get_offsets();
+                      ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(outer_offsets[0], 4);
+                      EXPECT_EQ(outer_offsets[1], 4);
+                      EXPECT_EQ(outer_offsets[2], 4);
+                      EXPECT_EQ(outer_offsets[3], 5);
+                      EXPECT_EQ(outer_offsets[4], 7);
+
+                      const auto& inner_nullable =
+                              assert_cast<const ColumnNullable&>(outer_array.get_data());
+                      ASSERT_EQ(inner_nullable.size(), 7);
+                      EXPECT_FALSE(inner_nullable.is_null_at(0));
+                      EXPECT_FALSE(inner_nullable.is_null_at(1));
+                      EXPECT_TRUE(inner_nullable.is_null_at(2));
+                      EXPECT_FALSE(inner_nullable.is_null_at(3));
+                      EXPECT_FALSE(inner_nullable.is_null_at(6));
+
+                      const auto& inner_array =
+                              assert_cast<const ColumnArray&>(inner_nullable.get_nested_column());
+                      const auto& inner_offsets = inner_array.get_offsets();
+                      ASSERT_EQ(inner_offsets.size(), 7);
+                      EXPECT_EQ(inner_offsets[0], 2);
+                      EXPECT_EQ(inner_offsets[1], 2);
+                      EXPECT_EQ(inner_offsets[2], 2);
+                      EXPECT_EQ(inner_offsets[3], 4);
+                      EXPECT_EQ(inner_offsets[4], 5);
+                      EXPECT_EQ(inner_offsets[5], 5);
+                      EXPECT_EQ(inner_offsets[6], 7);
+
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(inner_array.get_data());
+                      const auto& values =
+                              assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                      ASSERT_EQ(elements.size(), 7);
+                      EXPECT_EQ(values.get_element(0), 1);
+                      EXPECT_EQ(values.get_element(1), 2);
+                      EXPECT_TRUE(elements.is_null_at(2));
+                      EXPECT_EQ(values.get_element(3), 3);
+                      EXPECT_EQ(values.get_element(4), 4);
+                      EXPECT_EQ(values.get_element(5), 5);
+                      EXPECT_TRUE(elements.is_null_at(6));
+                  });
+        add_field(arrow::field(
+                          "map_int_string_col",
+                          arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), false)),
+                          false),
+                  build_required_int_string_map_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_MAP);
+                      const auto* map_type =
+                              assert_cast<const DataTypeMap*>(remove_nullable(schema.type).get());
+                      EXPECT_EQ(remove_nullable(map_type->get_key_type())->get_primitive_type(),
+                                TYPE_INT);
+                      EXPECT_EQ(remove_nullable(map_type->get_value_type())->get_primitive_type(),
+                                TYPE_STRING);
+                      const auto& map_column = assert_cast<const ColumnMap&>(column);
+                      ASSERT_EQ(map_column.size(), ROW_COUNT);
+                      const auto map_size_at = [&map_column](size_t row_idx) {
+                          return map_column.get_offsets()[row_idx] -
+                                 (row_idx == 0 ? 0 : map_column.get_offsets()[row_idx - 1]);
+                      };
+                      EXPECT_EQ(map_size_at(0), 2);
+                      EXPECT_EQ(map_size_at(1), 1);
+                      EXPECT_EQ(map_size_at(2), 3);
+                      EXPECT_EQ(map_size_at(4), 2);
+                      const auto& keys =
+                              get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                      const auto& values =
+                              get_nullable_nested_column<ColumnString>(map_column.get_values());
+                      ASSERT_EQ(keys.size(), 9);
+                      ASSERT_EQ(values.size(), 9);
+                      EXPECT_EQ(keys.get_element(0), 1);
+                      EXPECT_EQ(keys.get_element(5), 6);
+                      EXPECT_EQ(keys.get_element(8), 9);
+                      EXPECT_EQ(values.get_data_at(0).to_string(), "a");
+                      EXPECT_EQ(values.get_data_at(5).to_string(), "f");
+                      EXPECT_EQ(values.get_data_at(8).to_string(), "i");
+                  });
+        add_field(
+                arrow::field("nullable_map_int_string_col",
+                             arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)),
+                             true),
+                build_nullable_int_string_map_array(),
+                [](const ParquetColumnSchema& schema, const IColumn& column) {
+                    EXPECT_TRUE(schema.type->is_nullable());
+                    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                    EXPECT_FALSE(nullable_column.is_null_at(0));
+                    EXPECT_TRUE(nullable_column.is_null_at(1));
+                    EXPECT_FALSE(nullable_column.is_null_at(2));
+                    const auto& map_column =
+                            assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                    const auto& offsets = map_column.get_offsets();
+                    ASSERT_EQ(offsets.size(), ROW_COUNT);
+                    EXPECT_EQ(offsets[0], 2);
+                    EXPECT_EQ(offsets[1], 2);
+                    EXPECT_EQ(offsets[2], 2);
+                    EXPECT_EQ(offsets[3], 3);
+                    EXPECT_EQ(offsets[4], 4);
+                    const auto& keys =
+                            get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                    const auto& values =
+                            assert_cast<const ColumnNullable&>(map_column.get_values());
+                    const auto& value_data =
+                            assert_cast<const ColumnString&>(values.get_nested_column());
+                    ASSERT_EQ(keys.size(), 4);
+                    EXPECT_EQ(keys.get_element(0), 10);
+                    EXPECT_EQ(keys.get_element(1), 20);
+                    EXPECT_EQ(keys.get_element(3), 40);
+                    EXPECT_EQ(value_data.get_data_at(0).to_string(), "aa");
+                    EXPECT_TRUE(values.is_null_at(1));
+                    EXPECT_EQ(value_data.get_data_at(2).to_string(), "cc");
+                    EXPECT_TRUE(values.is_null_at(3));
+                });
+        add_field(
+                arrow::field("required_nullable_map_int_string_col",
+                             arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true)),
+                             false),
+                build_required_nullable_string_map_array(),
+                [](const ParquetColumnSchema& schema, const IColumn& column) {
+                    EXPECT_FALSE(schema.type->is_nullable());
+                    const auto& map_column = assert_cast<const ColumnMap&>(column);
+                    const auto& offsets = map_column.get_offsets();
+                    ASSERT_EQ(offsets.size(), ROW_COUNT);
+                    EXPECT_EQ(offsets[0], 0);
+                    EXPECT_EQ(offsets[1], 2);
+                    EXPECT_EQ(offsets[2], 3);
+                    EXPECT_EQ(offsets[3], 3);
+                    EXPECT_EQ(offsets[4], 4);
+                    const auto& values =
+                            assert_cast<const ColumnNullable&>(map_column.get_values());
+                    ASSERT_EQ(values.size(), 4);
+                    EXPECT_TRUE(values.is_null_at(0));
+                    EXPECT_FALSE(values.is_null_at(1));
+                    EXPECT_TRUE(values.is_null_at(3));
+                });
+        auto map_struct_type = arrow::struct_({
+                arrow::field("a", arrow::int32(), false),
+                arrow::field("b", arrow::utf8(), true),
+        });
+        add_field(arrow::field(
+                          "nullable_map_int_struct_col",
+                          arrow::map(arrow::int32(), arrow::field("value", map_struct_type, true)),
+                          true),
+                  build_nullable_int_struct_map_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& map_column =
+                              assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                      const auto& offsets = map_column.get_offsets();
+                      ASSERT_EQ(offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(offsets[0], 2);
+                      EXPECT_EQ(offsets[1], 2);
+                      EXPECT_EQ(offsets[2], 2);
+                      EXPECT_EQ(offsets[3], 3);
+                      EXPECT_EQ(offsets[4], 4);
+
+                      const auto& keys =
+                              get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                      const auto& values =
+                              assert_cast<const ColumnNullable&>(map_column.get_values());
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(values.get_nested_column());
+                      const auto& a_values =
+                              get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+                      const auto& b_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+                      const auto& b_data =
+                              assert_cast<const ColumnString&>(b_values.get_nested_column());
+                      ASSERT_EQ(keys.size(), 4);
+                      ASSERT_EQ(values.size(), 4);
+                      EXPECT_EQ(keys.get_element(0), 101);
+                      EXPECT_EQ(keys.get_element(1), 102);
+                      EXPECT_EQ(keys.get_element(3), 104);
+                      EXPECT_FALSE(values.is_null_at(0));
+                      EXPECT_FALSE(values.is_null_at(1));
+                      EXPECT_TRUE(values.is_null_at(2));
+                      EXPECT_FALSE(values.is_null_at(3));
+                      EXPECT_EQ(a_values.get_element(0), 21);
+                      EXPECT_EQ(a_values.get_element(1), 22);
+                      EXPECT_EQ(a_values.get_element(3), 24);
+                      EXPECT_EQ(b_data.get_data_at(0).to_string(), "ma");
+                      EXPECT_TRUE(b_values.is_null_at(1));
+                      EXPECT_EQ(b_data.get_data_at(3).to_string(), "me");
+                  });
+        auto map_list_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        add_field(
+                arrow::field("nullable_map_int_list_col",
+                             arrow::map(arrow::int32(), arrow::field("value", map_list_type, true)),
+                             true),
+                build_nullable_int_list_map_array(),
+                [](const ParquetColumnSchema& schema, const IColumn& column) {
+                    EXPECT_TRUE(schema.type->is_nullable());
+                    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                    EXPECT_FALSE(nullable_column.is_null_at(0));
+                    EXPECT_TRUE(nullable_column.is_null_at(1));
+                    EXPECT_FALSE(nullable_column.is_null_at(2));
+                    EXPECT_FALSE(nullable_column.is_null_at(3));
+                    EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                    const auto& map_column =
+                            assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                    const auto& map_offsets = map_column.get_offsets();
+                    ASSERT_EQ(map_offsets.size(), ROW_COUNT);
+                    EXPECT_EQ(map_offsets[0], 2);
+                    EXPECT_EQ(map_offsets[1], 2);
+                    EXPECT_EQ(map_offsets[2], 2);
+                    EXPECT_EQ(map_offsets[3], 4);
+                    EXPECT_EQ(map_offsets[4], 5);
+
+                    const auto& keys =
+                            get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                    ASSERT_EQ(keys.size(), 5);
+                    EXPECT_EQ(keys.get_element(0), 201);
+                    EXPECT_EQ(keys.get_element(1), 202);
+                    EXPECT_EQ(keys.get_element(2), 203);
+                    EXPECT_EQ(keys.get_element(3), 204);
+                    EXPECT_EQ(keys.get_element(4), 205);
+
+                    const auto& values =
+                            assert_cast<const ColumnNullable&>(map_column.get_values());
+                    ASSERT_EQ(values.size(), 5);
+                    EXPECT_FALSE(values.is_null_at(0));
+                    EXPECT_FALSE(values.is_null_at(1));
+                    EXPECT_TRUE(values.is_null_at(2));
+                    EXPECT_FALSE(values.is_null_at(3));
+                    EXPECT_FALSE(values.is_null_at(4));
+
+                    const auto& list_column =
+                            assert_cast<const ColumnArray&>(values.get_nested_column());
+                    const auto& list_offsets = list_column.get_offsets();
+                    ASSERT_EQ(list_offsets.size(), 5);
+                    EXPECT_EQ(list_offsets[0], 2);
+                    EXPECT_EQ(list_offsets[1], 2);
+                    EXPECT_EQ(list_offsets[2], 2);
+                    EXPECT_EQ(list_offsets[3], 4);
+                    EXPECT_EQ(list_offsets[4], 5);
+
+                    const auto& elements =
+                            assert_cast<const ColumnNullable&>(list_column.get_data());
+                    const auto& element_values =
+                            assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                    ASSERT_EQ(elements.size(), 5);
+                    EXPECT_EQ(element_values.get_element(0), 1);
+                    EXPECT_EQ(element_values.get_element(1), 2);
+                    EXPECT_TRUE(elements.is_null_at(2));
+                    EXPECT_EQ(element_values.get_element(3), 3);
+                    EXPECT_EQ(element_values.get_element(4), 4);
+                });
+        auto list_map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        add_field(arrow::field("nullable_list_map_int_string_col",
+                               arrow::list(arrow::field("element", list_map_type, true)), true),
+                  build_nullable_map_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& outer_array =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& outer_offsets = outer_array.get_offsets();
+                      ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(outer_offsets[0], 2);
+                      EXPECT_EQ(outer_offsets[1], 2);
+                      EXPECT_EQ(outer_offsets[2], 2);
+                      EXPECT_EQ(outer_offsets[3], 4);
+                      EXPECT_EQ(outer_offsets[4], 5);
+
+                      const auto& map_values =
+                              assert_cast<const ColumnNullable&>(outer_array.get_data());
+                      ASSERT_EQ(map_values.size(), 5);
+                      EXPECT_FALSE(map_values.is_null_at(0));
+                      EXPECT_FALSE(map_values.is_null_at(1));
+                      EXPECT_TRUE(map_values.is_null_at(2));
+                      EXPECT_FALSE(map_values.is_null_at(3));
+                      EXPECT_FALSE(map_values.is_null_at(4));
+
+                      const auto& map_column =
+                              assert_cast<const ColumnMap&>(map_values.get_nested_column());
+                      const auto& map_offsets = map_column.get_offsets();
+                      ASSERT_EQ(map_offsets.size(), 5);
+                      EXPECT_EQ(map_offsets[0], 2);
+                      EXPECT_EQ(map_offsets[1], 2);
+                      EXPECT_EQ(map_offsets[2], 2);
+                      EXPECT_EQ(map_offsets[3], 3);
+                      EXPECT_EQ(map_offsets[4], 4);
+                      const auto& keys =
+                              get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                      const auto& values =
+                              assert_cast<const ColumnNullable&>(map_column.get_values());
+                      const auto& value_data =
+                              assert_cast<const ColumnString&>(values.get_nested_column());
+                      ASSERT_EQ(keys.size(), 4);
+                      EXPECT_EQ(keys.get_element(0), 1);
+                      EXPECT_EQ(keys.get_element(1), 2);
+                      EXPECT_EQ(keys.get_element(2), 3);
+                      EXPECT_EQ(keys.get_element(3), 4);
+                      EXPECT_EQ(value_data.get_data_at(0).to_string(), "a");
+                      EXPECT_TRUE(values.is_null_at(1));
+                      EXPECT_EQ(value_data.get_data_at(2).to_string(), "c");
+                      EXPECT_EQ(value_data.get_data_at(3).to_string(), "d");
+                  });
+        auto nested_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        add_field(arrow::field(
+                          "nullable_map_int_map_int_string_col",
+                          arrow::map(arrow::int32(), arrow::field("value", nested_map_type, true)),
+                          true),
+                  build_nullable_int_map_map_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& outer_map =
+                              assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                      const auto& outer_offsets = outer_map.get_offsets();
+                      ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(outer_offsets[0], 2);
+                      EXPECT_EQ(outer_offsets[1], 2);
+                      EXPECT_EQ(outer_offsets[2], 2);
+                      EXPECT_EQ(outer_offsets[3], 4);
+                      EXPECT_EQ(outer_offsets[4], 4);
+
+                      const auto& outer_keys =
+                              get_nullable_nested_column<ColumnInt32>(outer_map.get_keys());
+                      ASSERT_EQ(outer_keys.size(), 4);
+                      EXPECT_EQ(outer_keys.get_element(0), 10);
+                      EXPECT_EQ(outer_keys.get_element(1), 20);
+                      EXPECT_EQ(outer_keys.get_element(2), 30);
+                      EXPECT_EQ(outer_keys.get_element(3), 40);
+
+                      const auto& inner_values =
+                              assert_cast<const ColumnNullable&>(outer_map.get_values());
+                      ASSERT_EQ(inner_values.size(), 4);
+                      EXPECT_FALSE(inner_values.is_null_at(0));
+                      EXPECT_FALSE(inner_values.is_null_at(1));
+                      EXPECT_TRUE(inner_values.is_null_at(2));
+                      EXPECT_FALSE(inner_values.is_null_at(3));
+
+                      const auto& inner_map =
+                              assert_cast<const ColumnMap&>(inner_values.get_nested_column());
+                      const auto& inner_offsets = inner_map.get_offsets();
+                      ASSERT_EQ(inner_offsets.size(), 4);
+                      EXPECT_EQ(inner_offsets[0], 1);
+                      EXPECT_EQ(inner_offsets[1], 1);
+                      EXPECT_EQ(inner_offsets[2], 1);
+                      EXPECT_EQ(inner_offsets[3], 2);
+                      const auto& inner_keys =
+                              get_nullable_nested_column<ColumnInt32>(inner_map.get_keys());
+                      const auto& inner_strings =
+                              assert_cast<const ColumnNullable&>(inner_map.get_values());
+                      const auto& inner_string_data =
+                              assert_cast<const ColumnString&>(inner_strings.get_nested_column());
+                      ASSERT_EQ(inner_keys.size(), 2);
+                      EXPECT_EQ(inner_keys.get_element(0), 101);
+                      EXPECT_EQ(inner_keys.get_element(1), 401);
+                      EXPECT_EQ(inner_string_data.get_data_at(0).to_string(), "aa");
+                      EXPECT_TRUE(inner_strings.is_null_at(1));
+                  });
+        auto deep_list_value_type = arrow::list(arrow::field("element", arrow::int32(), true));
+        auto deep_list_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", deep_list_value_type, true));
+        auto deep_list_struct_type = arrow::struct_({arrow::field("kv", deep_list_map_type, true)});
+        add_field(arrow::field("nullable_list_struct_map_list_col",
+                               arrow::list(arrow::field("element", deep_list_struct_type, true)),
+                               true),
+                  build_deep_list_struct_map_list_array(),
+                  [](const ParquetColumnSchema& schema, const IColumn& column) {
+                      EXPECT_TRUE(schema.type->is_nullable());
+                      const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                      ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                      EXPECT_FALSE(nullable_column.is_null_at(0));
+                      EXPECT_TRUE(nullable_column.is_null_at(1));
+                      EXPECT_FALSE(nullable_column.is_null_at(2));
+                      EXPECT_FALSE(nullable_column.is_null_at(3));
+                      EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                      const auto& outer_array =
+                              assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+                      const auto& outer_offsets = outer_array.get_offsets();
+                      ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                      EXPECT_EQ(outer_offsets[0], 2);
+                      EXPECT_EQ(outer_offsets[1], 2);
+                      EXPECT_EQ(outer_offsets[2], 2);
+                      EXPECT_EQ(outer_offsets[3], 4);
+                      EXPECT_EQ(outer_offsets[4], 5);
+
+                      const auto& struct_values =
+                              assert_cast<const ColumnNullable&>(outer_array.get_data());
+                      ASSERT_EQ(struct_values.size(), 5);
+                      EXPECT_FALSE(struct_values.is_null_at(0));
+                      EXPECT_TRUE(struct_values.is_null_at(1));
+                      EXPECT_FALSE(struct_values.is_null_at(2));
+                      EXPECT_FALSE(struct_values.is_null_at(3));
+                      EXPECT_FALSE(struct_values.is_null_at(4));
+
+                      const auto& struct_column =
+                              assert_cast<const ColumnStruct&>(struct_values.get_nested_column());
+                      const auto& map_values =
+                              assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+                      ASSERT_EQ(map_values.size(), 5);
+                      EXPECT_FALSE(map_values.is_null_at(0));
+                      EXPECT_TRUE(map_values.is_null_at(1));
+                      EXPECT_TRUE(map_values.is_null_at(2));
+                      EXPECT_FALSE(map_values.is_null_at(3));
+                      EXPECT_FALSE(map_values.is_null_at(4));
+
+                      const auto& map_column =
+                              assert_cast<const ColumnMap&>(map_values.get_nested_column());
+                      const auto& map_offsets = map_column.get_offsets();
+                      ASSERT_EQ(map_offsets.size(), 5);
+                      EXPECT_EQ(map_offsets[0], 2);
+                      EXPECT_EQ(map_offsets[1], 2);
+                      EXPECT_EQ(map_offsets[2], 2);
+                      EXPECT_EQ(map_offsets[3], 2);
+                      EXPECT_EQ(map_offsets[4], 4);
+                      const auto& keys =
+                              get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+                      ASSERT_EQ(keys.size(), 4);
+                      EXPECT_EQ(keys.get_element(0), 1);
+                      EXPECT_EQ(keys.get_element(1), 2);
+                      EXPECT_EQ(keys.get_element(2), 3);
+                      EXPECT_EQ(keys.get_element(3), 4);
+
+                      const auto& lists =
+                              assert_cast<const ColumnNullable&>(map_column.get_values());
+                      ASSERT_EQ(lists.size(), 4);
+                      EXPECT_FALSE(lists.is_null_at(0));
+                      EXPECT_FALSE(lists.is_null_at(1));
+                      EXPECT_TRUE(lists.is_null_at(2));
+                      EXPECT_FALSE(lists.is_null_at(3));
+                      const auto& list_column =
+                              assert_cast<const ColumnArray&>(lists.get_nested_column());
+                      const auto& list_offsets = list_column.get_offsets();
+                      ASSERT_EQ(list_offsets.size(), 4);
+                      EXPECT_EQ(list_offsets[0], 2);
+                      EXPECT_EQ(list_offsets[1], 2);
+                      EXPECT_EQ(list_offsets[2], 2);
+                      EXPECT_EQ(list_offsets[3], 3);
+                      const auto& elements =
+                              assert_cast<const ColumnNullable&>(list_column.get_data());
+                      const auto& element_values =
+                              assert_cast<const ColumnInt32&>(elements.get_nested_column());
+                      ASSERT_EQ(elements.size(), 3);
+                      EXPECT_EQ(element_values.get_element(0), 10);
+                      EXPECT_TRUE(elements.is_null_at(1));
+                      EXPECT_EQ(element_values.get_element(2), 40);
+                  });
+        auto deep_map_nested_map_type =
+                arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+        auto deep_map_list_type =
+                arrow::list(arrow::field("element", deep_map_nested_map_type, true));
+        add_field(
+                arrow::field(
+                        "nullable_map_int_list_map_int_string_col",
+                        arrow::map(arrow::int32(), arrow::field("value", deep_map_list_type, true)),
+                        true),
+                build_deep_map_list_map_array(),
+                [](const ParquetColumnSchema& schema, const IColumn& column) {
+                    EXPECT_TRUE(schema.type->is_nullable());
+                    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                    EXPECT_FALSE(nullable_column.is_null_at(0));
+                    EXPECT_TRUE(nullable_column.is_null_at(1));
+                    EXPECT_FALSE(nullable_column.is_null_at(2));
+                    EXPECT_FALSE(nullable_column.is_null_at(3));
+                    EXPECT_FALSE(nullable_column.is_null_at(4));
+
+                    const auto& outer_map =
+                            assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+                    const auto& outer_offsets = outer_map.get_offsets();
+                    ASSERT_EQ(outer_offsets.size(), ROW_COUNT);
+                    EXPECT_EQ(outer_offsets[0], 2);
+                    EXPECT_EQ(outer_offsets[1], 2);
+                    EXPECT_EQ(outer_offsets[2], 2);
+                    EXPECT_EQ(outer_offsets[3], 4);
+                    EXPECT_EQ(outer_offsets[4], 5);
+                    const auto& outer_keys =
+                            get_nullable_nested_column<ColumnInt32>(outer_map.get_keys());
+                    ASSERT_EQ(outer_keys.size(), 5);
+                    EXPECT_EQ(outer_keys.get_element(0), 10);
+                    EXPECT_EQ(outer_keys.get_element(1), 20);
+                    EXPECT_EQ(outer_keys.get_element(2), 30);
+                    EXPECT_EQ(outer_keys.get_element(3), 40);
+                    EXPECT_EQ(outer_keys.get_element(4), 50);
+
+                    const auto& lists = assert_cast<const ColumnNullable&>(outer_map.get_values());
+                    ASSERT_EQ(lists.size(), 5);
+                    EXPECT_FALSE(lists.is_null_at(0));
+                    EXPECT_FALSE(lists.is_null_at(1));
+                    EXPECT_TRUE(lists.is_null_at(2));
+                    EXPECT_FALSE(lists.is_null_at(3));
+                    EXPECT_FALSE(lists.is_null_at(4));
+                    const auto& list_column =
+                            assert_cast<const ColumnArray&>(lists.get_nested_column());
+                    const auto& list_offsets = list_column.get_offsets();
+                    ASSERT_EQ(list_offsets.size(), 5);
+                    EXPECT_EQ(list_offsets[0], 3);
+                    EXPECT_EQ(list_offsets[1], 3);
+                    EXPECT_EQ(list_offsets[2], 3);
+                    EXPECT_EQ(list_offsets[3], 4);
+                    EXPECT_EQ(list_offsets[4], 6);
+
+                    const auto& inner_maps =
+                            assert_cast<const ColumnNullable&>(list_column.get_data());
+                    ASSERT_EQ(inner_maps.size(), 6);
+                    EXPECT_FALSE(inner_maps.is_null_at(0));
+                    EXPECT_FALSE(inner_maps.is_null_at(1));
+                    EXPECT_TRUE(inner_maps.is_null_at(2));
+                    EXPECT_FALSE(inner_maps.is_null_at(3));
+                    EXPECT_TRUE(inner_maps.is_null_at(4));
+                    EXPECT_FALSE(inner_maps.is_null_at(5));
+                    const auto& inner_map_column =
+                            assert_cast<const ColumnMap&>(inner_maps.get_nested_column());
+                    const auto& inner_offsets = inner_map_column.get_offsets();
+                    ASSERT_EQ(inner_offsets.size(), 6);
+                    EXPECT_EQ(inner_offsets[0], 2);
+                    EXPECT_EQ(inner_offsets[1], 2);
+                    EXPECT_EQ(inner_offsets[2], 2);
+                    EXPECT_EQ(inner_offsets[3], 3);
+                    EXPECT_EQ(inner_offsets[4], 3);
+                    EXPECT_EQ(inner_offsets[5], 4);
+                    const auto& inner_keys =
+                            get_nullable_nested_column<ColumnInt32>(inner_map_column.get_keys());
+                    ASSERT_EQ(inner_keys.size(), 4);
+                    EXPECT_EQ(inner_keys.get_element(0), 1);
+                    EXPECT_EQ(inner_keys.get_element(1), 2);
+                    EXPECT_EQ(inner_keys.get_element(2), 3);
+                    EXPECT_EQ(inner_keys.get_element(3), 4);
+                    const auto& strings =
+                            assert_cast<const ColumnNullable&>(inner_map_column.get_values());
+                    const auto& string_data =
+                            assert_cast<const ColumnString&>(strings.get_nested_column());
+                    ASSERT_EQ(strings.size(), 4);
+                    EXPECT_EQ(string_data.get_data_at(0).to_string(), "a");
+                    EXPECT_TRUE(strings.is_null_at(1));
+                    EXPECT_EQ(string_data.get_data_at(2).to_string(), "c");
+                    EXPECT_EQ(string_data.get_data_at(3).to_string(), "d");
+                });
+
+        auto schema = arrow::schema(_arrow_fields);
+        auto table = arrow::Table::Make(schema, _arrays);
+
+        auto file_result = arrow::io::FileOutputStream::Open(_file_path);
+        ASSERT_TRUE(file_result.ok()) << file_result.status();
+        std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+        ::parquet::WriterProperties::Builder builder;
+        builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+        builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+        builder.compression(::parquet::Compression::UNCOMPRESSED);
+        PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                          ROW_COUNT, builder.build()));
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_reader(size_t field_idx) const {
+        ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+        std::unique_ptr<ParquetColumnReader> reader;
+        auto st = factory.create(*_fields[field_idx], &reader);
+        EXPECT_TRUE(st.ok()) << st;
+        return reader;
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_projected_child_reader(size_t field_idx,
+                                                                       size_t child_idx) const {
+        const auto& struct_schema = *_fields[field_idx];
+        EXPECT_LT(child_idx, struct_schema.children.size());
+
+        format::LocalColumnIndex projection;
+        projection.index = struct_schema.local_id;
+        projection.project_all_children = false;
+        format::LocalColumnIndex child_projection;
+        child_projection.index = struct_schema.children[child_idx]->local_id;
+        projection.children.push_back(std::move(child_projection));
+
+        ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+        std::unique_ptr<ParquetColumnReader> reader;
+        auto st = factory.create(struct_schema, &projection, &reader);
+        EXPECT_TRUE(st.ok()) << st;
+        return reader;
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_projected_grandchild_reader(
+            size_t field_idx, size_t child_idx, size_t grandchild_idx) const {
+        const auto& struct_schema = *_fields[field_idx];
+        EXPECT_LT(child_idx, struct_schema.children.size());
+        const auto& child_schema = *struct_schema.children[child_idx];
+        EXPECT_LT(grandchild_idx, child_schema.children.size());
+
+        format::LocalColumnIndex projection;
+        projection.index = struct_schema.local_id;
+        projection.project_all_children = false;
+        format::LocalColumnIndex child_projection;
+        child_projection.index = child_schema.local_id;
+        child_projection.project_all_children = false;
+        format::LocalColumnIndex grandchild_projection;
+        grandchild_projection.index = child_schema.children[grandchild_idx]->local_id;
+        child_projection.children.push_back(std::move(grandchild_projection));
+        projection.children.push_back(std::move(child_projection));
+
+        ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+        std::unique_ptr<ParquetColumnReader> reader;
+        auto st = factory.create(struct_schema, &projection, &reader);
+        EXPECT_TRUE(st.ok()) << st;
+        return reader;
+    }
+
+    void read_and_validate(size_t field_idx) const {
+        auto reader = create_reader(field_idx);
+        ASSERT_NE(reader, nullptr);
+        MutableColumnPtr column = reader->type()->create_column();
+        int64_t rows_read = 0;
+        auto st = reader->read(ROW_COUNT, column, &rows_read);
+        ASSERT_TRUE(st.ok()) << st;
+        ASSERT_EQ(rows_read, ROW_COUNT);
+        ASSERT_EQ(column->size(), ROW_COUNT);
+        _expected_by_field[field_idx](*_fields[field_idx], *column);
+    }
+
+    size_t find_field_idx(const std::string& name) const {
+        for (size_t field_idx = 0; field_idx < _fields.size(); ++field_idx) {
+            if (_fields[field_idx]->name == name) {
+                return field_idx;
+            }
+        }
+        ADD_FAILURE() << "Cannot find parquet test field " << name;
+        return _fields.size();
+    }
+
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+    std::unique_ptr<::parquet::ParquetFileReader> _file_reader;
+    std::shared_ptr<::parquet::RowGroupReader> _row_group;
+    std::vector<std::unique_ptr<ParquetColumnSchema>> _fields;
+    std::vector<std::shared_ptr<arrow::Field>> _arrow_fields;
+    std::vector<std::shared_ptr<arrow::Array>> _arrays;
+    std::vector<std::function<void(const ParquetColumnSchema&, const IColumn&)>> _expected_by_field;
+};
+
+TEST(ParquetColumnReaderBaseTest, SelectionVectorRangesAndValidation) {
+    SelectionVector identity;
+    ASSERT_TRUE(identity.verify(4, 5).ok());
+    auto ranges = selection_to_ranges(identity, 4);
+    ASSERT_EQ(ranges.size(), 1);
+    EXPECT_EQ(ranges[0].start, 0);
+    EXPECT_EQ(ranges[0].length, 4);
+
+    std::array<SelectionVector::Index, 5> selected = {0, 2, 3, 6, 6};
+    SelectionVector external(selected.data(), 4);
+    auto status = external.verify(3, 7);
+    ASSERT_TRUE(status.ok()) << status;
+    ranges = selection_to_ranges(external, 3);
+    ASSERT_EQ(ranges.size(), 2);
+    EXPECT_EQ(ranges[0].start, 0);
+    EXPECT_EQ(ranges[0].length, 1);
+    EXPECT_EQ(ranges[1].start, 2);
+    EXPECT_EQ(ranges[1].length, 2);
+
+    EXPECT_FALSE(external.verify(8, 7).ok());
+    EXPECT_FALSE(external.verify(5, 7).ok());
+    EXPECT_FALSE(external.verify(4, 6).ok());
+
+    std::array<SelectionVector::Index, 3> duplicate = {0, 2, 2};
+    SelectionVector non_strict(duplicate.data(), duplicate.size());
+    EXPECT_FALSE(non_strict.verify(3, 5).ok());
+    EXPECT_FALSE(identity.verify(1, -1).ok());
+}
+
+TEST(ParquetColumnReaderBaseTest, DefaultSelectUsesSkipReadRangesAndSkipNestedUsesBuild) {
+    DefaultSelectReader reader;
+    std::array<SelectionVector::Index, 3> selected = {1, 3, 4};
+    SelectionVector selection(selected.data(), selected.size());
+    auto column = ColumnInt32::create();
+    MutableColumnPtr mutable_column = std::move(column);
+    auto status = reader.select(selection, selected.size(), 6, mutable_column);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& values = assert_cast<const ColumnInt32&>(*mutable_column);
+    ASSERT_EQ(values.size(), 3);
+    EXPECT_EQ(values.get_element(0), 1);
+    EXPECT_EQ(values.get_element(1), 3);
+    EXPECT_EQ(values.get_element(2), 4);
+    EXPECT_EQ(reader.skip_ranges(), std::vector<int64_t>({1, 1, 1}));
+    EXPECT_EQ(reader.read_ranges(), std::vector<int64_t>({1, 2}));
+
+    BaseUnsupportedReader unsupported_reader;
+    auto skip_status = unsupported_reader.skip(1);
+    EXPECT_FALSE(skip_status.ok());
+    EXPECT_NE(skip_status.to_string().find("skip is not implemented"), std::string::npos);
+    EXPECT_FALSE(unsupported_reader.load_nested_batch(1).ok());
+    int64_t values_read = 0;
+    EXPECT_FALSE(unsupported_reader.build_nested_column(1, mutable_column, &values_read).ok());
+
+    NestedSkipReader nested_reader;
+    auto nested_status = nested_reader.skip_nested_column(3);
+    ASSERT_TRUE(nested_status.ok()) << nested_status;
+}
+
+TEST_F(ParquetColumnReaderTest, ScalarReadCoversRequiredNullableAllNullAndMultipleBatches) {
+    read_and_validate(find_field_idx("int32_col"));
+    read_and_validate(find_field_idx("string_col"));
+    read_and_validate(find_field_idx("nullable_int_col"));
+    read_and_validate(find_field_idx("all_null_int_col"));
+
+    auto reader = create_reader(find_field_idx("int32_col"));
+    auto column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader->read(2, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 2);
+    ASSERT_TRUE(reader->read(3, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 3);
+    const auto& values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(values.size(), ROW_COUNT);
+    EXPECT_EQ(values.get_element(0), 10);
+    EXPECT_EQ(values.get_element(1), 20);
+    EXPECT_EQ(values.get_element(2), 30);
+    EXPECT_EQ(values.get_element(4), 50);
+}
+
+TEST_F(ParquetColumnReaderTest, ScalarSkipCoversZeroSomeAllAndNulls) {
+    auto reader = create_reader(find_field_idx("int32_col"));
+    ASSERT_TRUE(reader->skip(0).ok());
+    auto column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader->read(1, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 1);
+    const auto& first_value = assert_cast<const ColumnInt32&>(*column);
+    EXPECT_EQ(first_value.get_element(0), 10);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    ASSERT_TRUE(reader->skip(2).ok());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->read(2, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 2);
+    const auto& skipped_values = assert_cast<const ColumnInt32&>(*column);
+    EXPECT_EQ(skipped_values.get_element(0), 30);
+    EXPECT_EQ(skipped_values.get_element(1), 40);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    ASSERT_TRUE(reader->skip(ROW_COUNT).ok());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->read(1, column, &rows_read).ok());
+    EXPECT_EQ(rows_read, 0);
+    EXPECT_EQ(column->size(), 0);
+
+    reader = create_reader(find_field_idx("nullable_int_col"));
+    ASSERT_TRUE(reader->skip(1).ok());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->read(2, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 2);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 2);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+}
+
+TEST_F(ParquetColumnReaderTest, ScalarSelectCoversAllDisjointSingleZeroThenReadAndNulls) {
+    auto reader = create_reader(find_field_idx("int32_col"));
+    SelectionVector all_selected(ROW_COUNT);
+    auto column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(all_selected, ROW_COUNT, ROW_COUNT, column).ok());
+    const auto& all_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(all_values.size(), ROW_COUNT);
+    EXPECT_EQ(all_values.get_element(0), 10);
+    EXPECT_EQ(all_values.get_element(4), 50);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    std::array<SelectionVector::Index, 3> disjoint = {0, 2, 4};
+    SelectionVector disjoint_selection(disjoint.data(), disjoint.size());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(disjoint_selection, disjoint.size(), ROW_COUNT, column).ok());
+    const auto& disjoint_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(disjoint_values.size(), 3);
+    EXPECT_EQ(disjoint_values.get_element(0), 10);
+    EXPECT_EQ(disjoint_values.get_element(1), 30);
+    EXPECT_EQ(disjoint_values.get_element(2), 50);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    std::array<SelectionVector::Index, 1> single = {2};
+    SelectionVector single_selection(single.data(), single.size());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(single_selection, single.size(), ROW_COUNT, column).ok());
+    const auto& single_value = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(single_value.size(), 1);
+    EXPECT_EQ(single_value.get_element(0), 30);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    std::array<SelectionVector::Index, 2> first_last = {0, 4};
+    SelectionVector first_last_selection(first_last.data(), first_last.size());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(first_last_selection, first_last.size(), ROW_COUNT, column).ok());
+    const auto& first_last_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(first_last_values.size(), 2);
+    EXPECT_EQ(first_last_values.get_element(0), 10);
+    EXPECT_EQ(first_last_values.get_element(1), 50);
+
+    reader = create_reader(find_field_idx("int32_col"));
+    SelectionVector empty_selection;
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(empty_selection, 0, 2, column).ok());
+    ASSERT_EQ(column->size(), 0);
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader->read(1, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 1);
+    const auto& after_empty_select = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(after_empty_select.size(), 1);
+    EXPECT_EQ(after_empty_select.get_element(0), 30);
+
+    reader = create_reader(find_field_idx("nullable_int_col"));
+    std::array<SelectionVector::Index, 3> nullable_rows = {0, 1, 2};
+    SelectionVector nullable_selection(nullable_rows.data(), nullable_rows.size());
+    column = reader->type()->create_column();
+    ASSERT_TRUE(reader->select(nullable_selection, nullable_rows.size(), ROW_COUNT, column).ok());
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+}
+
+TEST_F(ParquetColumnReaderTest, FactoryRejectsInvalidScalarInputsAndNestedScalarProjection) {
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    const auto& int_schema = *_fields[find_field_idx("int32_col")];
+    ParquetColumnSchema invalid_leaf;
+    invalid_leaf.kind = ParquetColumnSchemaKind::PRIMITIVE;
+    invalid_leaf.name = "invalid_leaf";
+    invalid_leaf.type = int_schema.type;
+    invalid_leaf.type_descriptor = int_schema.type_descriptor;
+    invalid_leaf.descriptor = int_schema.descriptor;
+    invalid_leaf.leaf_column_id = _file_reader->metadata()->num_columns();
+    auto status = factory.create(invalid_leaf, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid parquet leaf column id"), std::string::npos);
+
+    ParquetColumnSchema null_descriptor;
+    null_descriptor.kind = ParquetColumnSchemaKind::PRIMITIVE;
+    null_descriptor.name = "null_descriptor";
+    null_descriptor.type = int_schema.type;
+    null_descriptor.type_descriptor = int_schema.type_descriptor;
+    null_descriptor.leaf_column_id = int_schema.leaf_column_id;
+    status = factory.create(null_descriptor, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("descriptor is null"), std::string::npos);
+
+    const auto& list_element_schema =
+            *_fields[find_field_idx("nullable_list_int_col")]->children[0];
+    status = factory.create(list_element_schema, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("flat primitive columns"), std::string::npos);
+
+    const auto& list_schema = *_fields[find_field_idx("nullable_list_int_col")];
+    format::LocalColumnIndex projection =
+            format::LocalColumnIndex::partial_local(list_schema.local_id);
+    format::LocalColumnIndex element_projection =
+            format::LocalColumnIndex::partial_local(list_element_schema.local_id);
+    projection.children.push_back(std::move(element_projection));
+    status = factory.create(list_schema, &projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("scalar projection is invalid"), std::string::npos);
+}
+
+TEST_F(ParquetColumnReaderTest, FactoryRejectsInvalidComplexProjections) {
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    const auto& struct_schema = *_fields[find_field_idx("struct_col")];
+    format::LocalColumnIndex struct_empty =
+            format::LocalColumnIndex::partial_local(struct_schema.local_id);
+    auto status = factory.create(struct_schema, &struct_empty, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no children"), std::string::npos);
+
+    format::LocalColumnIndex struct_invalid =
+            format::LocalColumnIndex::partial_local(struct_schema.local_id);
+    struct_invalid.children.push_back(format::LocalColumnIndex::local(9999));
+    status = factory.create(struct_schema, &struct_invalid, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains invalid child"), std::string::npos);
+
+    const auto& list_schema = *_fields[find_field_idx("nullable_list_int_col")];
+    format::LocalColumnIndex list_empty =
+            format::LocalColumnIndex::partial_local(list_schema.local_id);
+    status = factory.create(list_schema, &list_empty, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no element"), std::string::npos);
+
+    const auto& map_schema = *_fields[find_field_idx("nullable_map_int_struct_col")];
+    const auto& value_schema = *map_schema.children[1];
+    format::LocalColumnIndex map_invalid =
+            format::LocalColumnIndex::partial_local(map_schema.local_id);
+    map_invalid.children.push_back(format::LocalColumnIndex::local(value_schema.local_id));
+    map_invalid.children.push_back(format::LocalColumnIndex::local(9999));
+    status = factory.create(map_schema, &map_invalid, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains invalid child"), std::string::npos);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadSupportedComplexTypes) {
+    read_and_validate(find_field_idx("struct_col"));
+    read_and_validate(find_field_idx("nullable_struct_col"));
+    read_and_validate(find_field_idx("nullable_struct_decimal_col"));
+    read_and_validate(find_field_idx("list_int_col"));
+    read_and_validate(find_field_idx("nullable_list_int_col"));
+    read_and_validate(find_field_idx("required_nullable_list_int_col"));
+    read_and_validate(find_field_idx("nullable_list_struct_col"));
+    read_and_validate(find_field_idx("nullable_list_list_int_col"));
+    read_and_validate(find_field_idx("map_int_string_col"));
+    read_and_validate(find_field_idx("nullable_map_int_string_col"));
+    read_and_validate(find_field_idx("required_nullable_map_int_string_col"));
+    read_and_validate(find_field_idx("nullable_map_int_struct_col"));
+    read_and_validate(find_field_idx("nullable_map_int_list_col"));
+    read_and_validate(find_field_idx("nullable_list_map_int_string_col"));
+    read_and_validate(find_field_idx("nullable_map_int_map_int_string_col"));
+    read_and_validate(find_field_idx("nullable_list_struct_map_list_col"));
+    read_and_validate(find_field_idx("nullable_map_int_list_map_int_string_col"));
+}
+
+TEST_F(ParquetColumnReaderTest, SkipThenRead) {
+    auto reader = create_reader(find_field_idx("int32_col"));
+    auto st = reader->skip(2);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+
+    const auto& int_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(int_values.size(), 2);
+    EXPECT_EQ(int_values.get_element(0), 30);
+    EXPECT_EQ(int_values.get_element(1), 40);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectReadsOnlySelectedRanges) {
+    auto reader = create_reader(find_field_idx("int32_col"));
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 2);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& int_values = assert_cast<const ColumnInt32&>(*column);
+    ASSERT_EQ(int_values.size(), 3);
+    EXPECT_EQ(int_values.get_element(0), 10);
+    EXPECT_EQ(int_values.get_element(1), 30);
+    EXPECT_EQ(int_values.get_element(2), 50);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedStructChildren) {
+    const auto field_idx = find_field_idx("struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& struct_schema = *_fields[field_idx];
+    ASSERT_EQ(struct_schema.name, "struct_col");
+    ASSERT_EQ(struct_schema.children.size(), 2);
+
+    format::LocalColumnIndex projection;
+    projection.index = struct_schema.local_id;
+    projection.project_all_children = false;
+    format::LocalColumnIndex child_projection;
+    child_projection.index = struct_schema.children[1]->local_id;
+    projection.children.push_back(std::move(child_projection));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    auto st = factory.create(struct_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(remove_nullable(reader->type())->get_primitive_type(), TYPE_STRUCT);
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(reader->type()).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "b");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+    const auto& struct_column = assert_cast<const ColumnStruct&>(*column);
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& values = get_nullable_nested_column<ColumnString>(struct_column.get_column(0));
+    EXPECT_EQ(values.get_data_at(0).to_string(), "sa");
+    EXPECT_EQ(values.get_data_at(4).to_string(), "se");
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedNullableStructChildren) {
+    const auto field_idx = find_field_idx("nullable_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& struct_schema = *_fields[field_idx];
+    ASSERT_EQ(struct_schema.name, "nullable_struct_col");
+    ASSERT_EQ(struct_schema.children.size(), 2);
+
+    format::LocalColumnIndex projection;
+    projection.index = struct_schema.local_id;
+    projection.project_all_children = false;
+    format::LocalColumnIndex child_projection;
+    child_projection.index = struct_schema.children[1]->local_id;
+    projection.children.push_back(std::move(child_projection));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    auto st = factory.create(struct_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_TRUE(reader->type()->is_nullable());
+    ASSERT_EQ(remove_nullable(reader->type())->get_primitive_type(), TYPE_STRUCT);
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(reader->type()).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "b");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_TRUE(nullable_column.is_null_at(4));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    const auto& nested_values = assert_cast<const ColumnString&>(values.get_nested_column());
+    EXPECT_FALSE(values.is_null_at(0));
+    EXPECT_TRUE(values.is_null_at(2));
+    EXPECT_FALSE(values.is_null_at(3));
+    EXPECT_EQ(nested_values.get_data_at(0).to_string(), "nsa");
+    EXPECT_EQ(nested_values.get_data_at(3).to_string(), "nsd");
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedListStructElementChildren) {
+    const auto field_idx = find_field_idx("nullable_list_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& list_schema = *_fields[field_idx];
+    ASSERT_EQ(list_schema.name, "nullable_list_struct_col");
+    ASSERT_EQ(list_schema.children.size(), 1);
+    const auto& element_schema = *list_schema.children[0];
+    ASSERT_EQ(element_schema.children.size(), 2);
+
+    format::LocalColumnIndex projection;
+    projection.index = list_schema.local_id;
+    projection.project_all_children = false;
+    format::LocalColumnIndex element_projection;
+    element_projection.index = element_schema.local_id;
+    element_projection.project_all_children = false;
+    format::LocalColumnIndex child_projection;
+    child_projection.index = element_schema.children[1]->local_id;
+    element_projection.children.push_back(std::move(child_projection));
+    projection.children.push_back(std::move(element_projection));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    auto st = factory.create(list_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_TRUE(reader->type()->is_nullable());
+    const auto* array_type =
+            assert_cast<const DataTypeArray*>(remove_nullable(reader->type()).get());
+    const auto* element_type = assert_cast<const DataTypeStruct*>(
+            remove_nullable(array_type->get_nested_type()).get());
+    ASSERT_EQ(element_type->get_elements().size(), 1);
+    EXPECT_EQ(element_type->get_element_name(0), "b");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& elements = assert_cast<const ColumnNullable&>(array_column.get_data());
+    const auto& struct_column = assert_cast<const ColumnStruct&>(elements.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& b_values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    const auto& b_data = assert_cast<const ColumnString&>(b_values.get_nested_column());
+    ASSERT_EQ(elements.size(), 5);
+    EXPECT_EQ(b_data.get_data_at(0).to_string(), "la");
+    EXPECT_TRUE(b_values.is_null_at(1));
+    EXPECT_TRUE(elements.is_null_at(2));
+    EXPECT_EQ(b_data.get_data_at(3).to_string(), "ld");
+    EXPECT_EQ(b_data.get_data_at(4).to_string(), "le");
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedMapStructValueChildren) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& map_schema = *_fields[field_idx];
+    ASSERT_EQ(map_schema.name, "nullable_map_int_struct_col");
+    ASSERT_EQ(map_schema.children.size(), 2);
+    const auto& value_schema = *map_schema.children[1];
+    ASSERT_EQ(value_schema.children.size(), 2);
+
+    format::LocalColumnIndex projection;
+    projection.index = map_schema.local_id;
+    projection.project_all_children = false;
+    format::LocalColumnIndex value_projection;
+    value_projection.index = value_schema.local_id;
+    value_projection.project_all_children = false;
+    format::LocalColumnIndex child_projection;
+    child_projection.index = value_schema.children[1]->local_id;
+    value_projection.children.push_back(std::move(child_projection));
+    projection.children.push_back(std::move(value_projection));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    auto st = factory.create(map_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_TRUE(reader->type()->is_nullable());
+    const auto* map_type = assert_cast<const DataTypeMap*>(remove_nullable(reader->type()).get());
+    EXPECT_EQ(remove_nullable(map_type->get_key_type())->get_primitive_type(), TYPE_INT);
+    const auto* value_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(map_type->get_value_type()).get());
+    ASSERT_EQ(value_type->get_elements().size(), 1);
+    EXPECT_EQ(value_type->get_element_name(0), "b");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    const auto& struct_column = assert_cast<const ColumnStruct&>(values.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& b_values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    const auto& b_data = assert_cast<const ColumnString&>(b_values.get_nested_column());
+    ASSERT_EQ(keys.size(), 4);
+    ASSERT_EQ(values.size(), 4);
+    EXPECT_EQ(keys.get_element(0), 101);
+    EXPECT_EQ(keys.get_element(1), 102);
+    EXPECT_EQ(keys.get_element(3), 104);
+    EXPECT_EQ(b_data.get_data_at(0).to_string(), "ma");
+    EXPECT_TRUE(b_values.is_null_at(1));
+    EXPECT_TRUE(values.is_null_at(2));
+    EXPECT_EQ(b_data.get_data_at(3).to_string(), "me");
+}
+
+TEST_F(ParquetColumnReaderTest, AllowsMapKeyWithValueProjection) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& map_schema = *_fields[field_idx];
+    ASSERT_EQ(map_schema.children.size(), 2);
+    const auto& key_schema = *map_schema.children[0];
+    const auto& value_schema = *map_schema.children[1];
+
+    auto projection = format::LocalColumnIndex::partial_local(map_schema.local_id);
+    projection.children.push_back(format::LocalColumnIndex::local(key_schema.local_id));
+    projection.children.push_back(format::LocalColumnIndex::local(value_schema.local_id));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    const auto st = factory.create(map_schema, &projection, &reader);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_NE(reader, nullptr);
+}
+
+TEST_F(ParquetColumnReaderTest, RejectMapKeyOnlyProjection) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& map_schema = *_fields[field_idx];
+    ASSERT_EQ(map_schema.children.size(), 2);
+    const auto& key_schema = *map_schema.children[0];
+
+    auto projection = format::LocalColumnIndex::partial_local(map_schema.local_id);
+    projection.children.push_back(format::LocalColumnIndex::local(key_schema.local_id));
+
+    ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+    std::unique_ptr<ParquetColumnReader> reader;
+    const auto st = factory.create(map_schema, &projection, &reader);
+    ASSERT_FALSE(st.ok());
+    EXPECT_NE(st.to_string().find("contains no value"), std::string::npos);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedStructListChildOnly) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& struct_schema = *_fields[field_idx];
+    ASSERT_EQ(struct_schema.name, "nullable_struct_list_col");
+    ASSERT_EQ(struct_schema.children.size(), 2);
+
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    ASSERT_TRUE(reader->type()->is_nullable());
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(reader->type()).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "xs");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(xs_nullable.size(), ROW_COUNT);
+    EXPECT_FALSE(xs_nullable.is_null_at(0));
+    EXPECT_FALSE(xs_nullable.is_null_at(2));
+    EXPECT_TRUE(xs_nullable.is_null_at(3));
+    EXPECT_FALSE(xs_nullable.is_null_at(4));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), ROW_COUNT);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 2);
+    EXPECT_EQ(offsets[3], 2);
+    EXPECT_EQ(offsets[4], 4);
+    const auto& elements = assert_cast<const ColumnNullable&>(xs_array.get_data());
+    const auto& values = assert_cast<const ColumnInt32&>(elements.get_nested_column());
+    ASSERT_EQ(elements.size(), 4);
+    EXPECT_EQ(values.get_element(0), 1);
+    EXPECT_EQ(values.get_element(1), 2);
+    EXPECT_TRUE(elements.is_null_at(2));
+    EXPECT_EQ(values.get_element(3), 5);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipProjectedStructListChildOnlyThenRead) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(xs_nullable.size(), 3);
+    EXPECT_FALSE(xs_nullable.is_null_at(1));
+    EXPECT_TRUE(xs_nullable.is_null_at(2));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 0);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectProjectedStructListChildOnly) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(xs_nullable.size(), 3);
+    EXPECT_FALSE(xs_nullable.is_null_at(0));
+    EXPECT_TRUE(xs_nullable.is_null_at(1));
+    EXPECT_FALSE(xs_nullable.is_null_at(2));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadProjectedStructMapChildOnly) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    ASSERT_LT(field_idx, _fields.size());
+    const auto& struct_schema = *_fields[field_idx];
+    ASSERT_EQ(struct_schema.name, "nullable_struct_map_col");
+    ASSERT_EQ(struct_schema.children.size(), 2);
+
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    ASSERT_TRUE(reader->type()->is_nullable());
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(reader->type()).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "kv");
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(kv_nullable.size(), ROW_COUNT);
+    EXPECT_FALSE(kv_nullable.is_null_at(0));
+    EXPECT_FALSE(kv_nullable.is_null_at(2));
+    EXPECT_TRUE(kv_nullable.is_null_at(3));
+    EXPECT_FALSE(kv_nullable.is_null_at(4));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), ROW_COUNT);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 2);
+    EXPECT_EQ(offsets[3], 2);
+    EXPECT_EQ(offsets[4], 3);
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(kv_map.get_keys());
+    const auto& values = assert_cast<const ColumnNullable&>(kv_map.get_values());
+    const auto& value_data = assert_cast<const ColumnString&>(values.get_nested_column());
+    ASSERT_EQ(keys.size(), 3);
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 5);
+    EXPECT_EQ(value_data.get_data_at(0).to_string(), "one");
+    EXPECT_TRUE(values.is_null_at(1));
+    EXPECT_EQ(value_data.get_data_at(2).to_string(), "five");
+}
+
+TEST_F(ParquetColumnReaderTest, NullableStructUsesListChildAsShapeSource) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+}
+
+TEST_F(ParquetColumnReaderTest, NullableStructUsesMapChildAsShapeSource) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+}
+
+TEST_F(ParquetColumnReaderTest, NullableStructUsesNestedStructComplexChildAsShapeSource) {
+    const auto field_idx = find_field_idx("nullable_struct_nested_struct_list_col");
+    auto reader = create_projected_grandchild_reader(field_idx, 0, 0);
+    ASSERT_NE(reader, nullptr);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    auto st = reader->read(ROW_COUNT, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, ROW_COUNT);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_FALSE(nullable_column.is_null_at(4));
+
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& nested_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    EXPECT_FALSE(nested_nullable.is_null_at(0));
+    EXPECT_TRUE(nested_nullable.is_null_at(2));
+    EXPECT_FALSE(nested_nullable.is_null_at(3));
+    EXPECT_FALSE(nested_nullable.is_null_at(4));
+}
+
+TEST_F(ParquetColumnReaderTest, SkipProjectedStructMapChildOnlyThenRead) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(kv_nullable.size(), 3);
+    EXPECT_FALSE(kv_nullable.is_null_at(1));
+    EXPECT_TRUE(kv_nullable.is_null_at(2));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 0);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectProjectedStructMapChildOnly) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_projected_child_reader(field_idx, 1);
+    ASSERT_NE(reader, nullptr);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    ASSERT_EQ(struct_column.get_columns().size(), 1);
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(kv_nullable.size(), 3);
+    EXPECT_FALSE(kv_nullable.is_null_at(0));
+    EXPECT_TRUE(kv_nullable.is_null_at(1));
+    EXPECT_FALSE(kv_nullable.is_null_at(2));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 3);
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(kv_map.get_keys());
+    ASSERT_EQ(keys.size(), 3);
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 5);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadListWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_list_int_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipListWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_list_int_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& offsets = array_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 2);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectListWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_list_int_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& offsets = array_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 4);
+    EXPECT_EQ(offsets[2], 5);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadStructListWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipStructListWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+    ASSERT_EQ(xs_nullable.size(), 3);
+    EXPECT_FALSE(xs_nullable.is_null_at(1));
+    EXPECT_TRUE(xs_nullable.is_null_at(2));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 0);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectStructListWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_struct_list_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& a_values = get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+    EXPECT_EQ(a_values.get_element(0), 301);
+    EXPECT_EQ(a_values.get_element(1), 304);
+    EXPECT_EQ(a_values.get_element(2), 305);
+    const auto& xs_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+    ASSERT_EQ(xs_nullable.size(), 3);
+    EXPECT_FALSE(xs_nullable.is_null_at(0));
+    EXPECT_TRUE(xs_nullable.is_null_at(1));
+    EXPECT_FALSE(xs_nullable.is_null_at(2));
+    const auto& xs_array = assert_cast<const ColumnArray&>(xs_nullable.get_nested_column());
+    const auto& offsets = xs_array.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadStructMapWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipStructMapWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+    ASSERT_EQ(kv_nullable.size(), 3);
+    EXPECT_FALSE(kv_nullable.is_null_at(1));
+    EXPECT_TRUE(kv_nullable.is_null_at(2));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 0);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectStructMapWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_struct_map_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& a_values = get_nullable_nested_column<ColumnInt32>(struct_column.get_column(0));
+    EXPECT_EQ(a_values.get_element(0), 401);
+    EXPECT_EQ(a_values.get_element(1), 404);
+    EXPECT_EQ(a_values.get_element(2), 405);
+    const auto& kv_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(1));
+    ASSERT_EQ(kv_nullable.size(), 3);
+    EXPECT_FALSE(kv_nullable.is_null_at(0));
+    EXPECT_TRUE(kv_nullable.is_null_at(1));
+    EXPECT_FALSE(kv_nullable.is_null_at(2));
+    const auto& kv_map = assert_cast<const ColumnMap&>(kv_nullable.get_nested_column());
+    const auto& offsets = kv_map.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 2);
+    EXPECT_EQ(offsets[2], 3);
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(kv_map.get_keys());
+    const auto& values = assert_cast<const ColumnNullable&>(kv_map.get_values());
+    const auto& value_data = assert_cast<const ColumnString&>(values.get_nested_column());
+    ASSERT_EQ(keys.size(), 3);
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 5);
+    EXPECT_EQ(value_data.get_data_at(0).to_string(), "one");
+    EXPECT_TRUE(values.is_null_at(1));
+    EXPECT_EQ(value_data.get_data_at(2).to_string(), "five");
+}
+
+TEST_F(ParquetColumnReaderTest, ReadListStructWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_list_struct_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipListStructWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_list_struct_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& offsets = array_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 2);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectListStructWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_list_struct_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& offsets = array_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 4);
+    EXPECT_EQ(offsets[2], 5);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadListListWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_list_list_int_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipListListWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_list_list_int_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 3);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 0);
+    EXPECT_EQ(outer_offsets[2], 1);
+
+    const auto& inner_nullable = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(inner_nullable.size(), 1);
+    EXPECT_FALSE(inner_nullable.is_null_at(0));
+    const auto& inner_array = assert_cast<const ColumnArray&>(inner_nullable.get_nested_column());
+    const auto& inner_offsets = inner_array.get_offsets();
+    ASSERT_EQ(inner_offsets.size(), 1);
+    EXPECT_EQ(inner_offsets[0], 1);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectListListWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_list_list_int_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 3);
+    EXPECT_EQ(outer_offsets[0], 4);
+    EXPECT_EQ(outer_offsets[1], 5);
+    EXPECT_EQ(outer_offsets[2], 7);
+
+    const auto& inner_nullable = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(inner_nullable.size(), 7);
+    EXPECT_TRUE(inner_nullable.is_null_at(2));
+    const auto& inner_array = assert_cast<const ColumnArray&>(inner_nullable.get_nested_column());
+    const auto& inner_offsets = inner_array.get_offsets();
+    ASSERT_EQ(inner_offsets.size(), 7);
+    EXPECT_EQ(inner_offsets[0], 2);
+    EXPECT_EQ(inner_offsets[3], 4);
+    EXPECT_EQ(inner_offsets[4], 5);
+    EXPECT_EQ(inner_offsets[6], 7);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadMapWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipMapWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& offsets = map_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 1);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectMapWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& offsets = map_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 3);
+    EXPECT_EQ(offsets[2], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadMapStructWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipMapStructWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& offsets = map_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 0);
+    EXPECT_EQ(offsets[1], 0);
+    EXPECT_EQ(offsets[2], 1);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectMapStructWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_map_int_struct_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& offsets = map_column.get_offsets();
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 2);
+    EXPECT_EQ(offsets[1], 3);
+    EXPECT_EQ(offsets[2], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadMapListWithOverflowAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipMapListWithOverflowThenRead) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(3, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& map_offsets = map_column.get_offsets();
+    ASSERT_EQ(map_offsets.size(), 3);
+    EXPECT_EQ(map_offsets[0], 0);
+    EXPECT_EQ(map_offsets[1], 0);
+    EXPECT_EQ(map_offsets[2], 2);
+
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    ASSERT_EQ(values.size(), 2);
+    EXPECT_TRUE(values.is_null_at(0));
+    EXPECT_FALSE(values.is_null_at(1));
+    const auto& list_column = assert_cast<const ColumnArray&>(values.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 2);
+    EXPECT_EQ(list_offsets[0], 0);
+    EXPECT_EQ(list_offsets[1], 2);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectMapListWithOverflow) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& map_offsets = map_column.get_offsets();
+    ASSERT_EQ(map_offsets.size(), 3);
+    EXPECT_EQ(map_offsets[0], 2);
+    EXPECT_EQ(map_offsets[1], 4);
+    EXPECT_EQ(map_offsets[2], 5);
+
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    ASSERT_EQ(values.size(), 5);
+    EXPECT_FALSE(values.is_null_at(0));
+    EXPECT_TRUE(values.is_null_at(2));
+    EXPECT_FALSE(values.is_null_at(4));
+    const auto& list_column = assert_cast<const ColumnArray&>(values.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 5);
+    EXPECT_EQ(list_offsets[0], 2);
+    EXPECT_EQ(list_offsets[1], 2);
+    EXPECT_EQ(list_offsets[2], 2);
+    EXPECT_EQ(list_offsets[3], 4);
+    EXPECT_EQ(list_offsets[4], 5);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadDeepListStructMapListAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_list_struct_map_list_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(1, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 1);
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipDeepListStructMapListThenRead) {
+    const auto field_idx = find_field_idx("nullable_list_struct_map_list_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(4, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 4);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 4);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 4);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 0);
+    EXPECT_EQ(outer_offsets[2], 2);
+    EXPECT_EQ(outer_offsets[3], 3);
+
+    const auto& struct_values = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(struct_values.size(), 3);
+    EXPECT_FALSE(struct_values.is_null_at(0));
+    EXPECT_FALSE(struct_values.is_null_at(1));
+    EXPECT_FALSE(struct_values.is_null_at(2));
+    const auto& struct_column = assert_cast<const ColumnStruct&>(struct_values.get_nested_column());
+    const auto& map_values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(map_values.size(), 3);
+    EXPECT_TRUE(map_values.is_null_at(0));
+    EXPECT_FALSE(map_values.is_null_at(1));
+    EXPECT_FALSE(map_values.is_null_at(2));
+
+    const auto& map_column = assert_cast<const ColumnMap&>(map_values.get_nested_column());
+    const auto& map_offsets = map_column.get_offsets();
+    ASSERT_EQ(map_offsets.size(), 3);
+    EXPECT_EQ(map_offsets[0], 0);
+    EXPECT_EQ(map_offsets[1], 0);
+    EXPECT_EQ(map_offsets[2], 2);
+    const auto& keys = get_nullable_nested_column<ColumnInt32>(map_column.get_keys());
+    ASSERT_EQ(keys.size(), 2);
+    EXPECT_EQ(keys.get_element(0), 3);
+    EXPECT_EQ(keys.get_element(1), 4);
+    const auto& lists = assert_cast<const ColumnNullable&>(map_column.get_values());
+    ASSERT_EQ(lists.size(), 2);
+    EXPECT_TRUE(lists.is_null_at(0));
+    EXPECT_FALSE(lists.is_null_at(1));
+    const auto& list_column = assert_cast<const ColumnArray&>(lists.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 2);
+    EXPECT_EQ(list_offsets[0], 0);
+    EXPECT_EQ(list_offsets[1], 1);
+}
+
+TEST_F(ParquetColumnReaderTest, SelectDeepListStructMapList) {
+    const auto field_idx = find_field_idx("nullable_list_struct_map_list_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 3);
+    EXPECT_EQ(outer_offsets[0], 2);
+    EXPECT_EQ(outer_offsets[1], 4);
+    EXPECT_EQ(outer_offsets[2], 5);
+
+    const auto& struct_values = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(struct_values.size(), 5);
+    EXPECT_FALSE(struct_values.is_null_at(0));
+    EXPECT_TRUE(struct_values.is_null_at(1));
+    EXPECT_FALSE(struct_values.is_null_at(2));
+    EXPECT_FALSE(struct_values.is_null_at(3));
+    EXPECT_FALSE(struct_values.is_null_at(4));
+    const auto& struct_column = assert_cast<const ColumnStruct&>(struct_values.get_nested_column());
+    const auto& map_values = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(map_values.size(), 5);
+    EXPECT_FALSE(map_values.is_null_at(0));
+    EXPECT_TRUE(map_values.is_null_at(1));
+    EXPECT_TRUE(map_values.is_null_at(2));
+    EXPECT_FALSE(map_values.is_null_at(3));
+    EXPECT_FALSE(map_values.is_null_at(4));
+    const auto& map_column = assert_cast<const ColumnMap&>(map_values.get_nested_column());
+    const auto& map_offsets = map_column.get_offsets();
+    ASSERT_EQ(map_offsets.size(), 5);
+    EXPECT_EQ(map_offsets[0], 2);
+    EXPECT_EQ(map_offsets[1], 2);
+    EXPECT_EQ(map_offsets[2], 2);
+    EXPECT_EQ(map_offsets[3], 2);
+    EXPECT_EQ(map_offsets[4], 4);
+}
+
+TEST_F(ParquetColumnReaderTest, ReadDeepMapListMapAcrossChunks) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    MutableColumnPtr column = reader->type()->create_column();
+
+    int64_t rows_read = 0;
+    auto st = reader->read(1, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 1);
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+    st = reader->read(2, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 2);
+
+    _expected_by_field[field_idx](*_fields[field_idx], *column);
+}
+
+TEST_F(ParquetColumnReaderTest, SkipDeepMapListMapThenRead) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    auto st = reader->skip(1);
+    ASSERT_TRUE(st.ok()) << st;
+
+    MutableColumnPtr column = reader->type()->create_column();
+    int64_t rows_read = 0;
+    st = reader->read(4, column, &rows_read);
+    ASSERT_TRUE(st.ok()) << st;
+    ASSERT_EQ(rows_read, 4);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 4);
+    EXPECT_TRUE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    const auto& outer_map = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_map.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 4);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 0);
+    EXPECT_EQ(outer_offsets[2], 2);
+    EXPECT_EQ(outer_offsets[3], 3);
+    const auto& outer_keys = get_nullable_nested_column<ColumnInt32>(outer_map.get_keys());
+    ASSERT_EQ(outer_keys.size(), 3);
+    EXPECT_EQ(outer_keys.get_element(0), 30);
+    EXPECT_EQ(outer_keys.get_element(1), 40);
+    EXPECT_EQ(outer_keys.get_element(2), 50);
+
+    const auto& lists = assert_cast<const ColumnNullable&>(outer_map.get_values());
+    ASSERT_EQ(lists.size(), 3);
+    EXPECT_TRUE(lists.is_null_at(0));
+    EXPECT_FALSE(lists.is_null_at(1));
+    EXPECT_FALSE(lists.is_null_at(2));
+    const auto& list_column = assert_cast<const ColumnArray&>(lists.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 3);
+    EXPECT_EQ(list_offsets[0], 0);
+    EXPECT_EQ(list_offsets[1], 1);
+    EXPECT_EQ(list_offsets[2], 3);
+    const auto& inner_maps = assert_cast<const ColumnNullable&>(list_column.get_data());
+    ASSERT_EQ(inner_maps.size(), 3);
+    EXPECT_FALSE(inner_maps.is_null_at(0));
+    EXPECT_TRUE(inner_maps.is_null_at(1));
+    EXPECT_FALSE(inner_maps.is_null_at(2));
+}
+
+TEST_F(ParquetColumnReaderTest, SelectDeepMapListMap) {
+    const auto field_idx = find_field_idx("nullable_map_int_list_map_int_string_col");
+    auto reader = create_reader(field_idx);
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    selection.set_index(2, 4);
+
+    MutableColumnPtr column = reader->type()->create_column();
+    auto st = reader->select(selection, 3, ROW_COUNT, column);
+    ASSERT_TRUE(st.ok()) << st;
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 3);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    const auto& outer_map = assert_cast<const ColumnMap&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_map.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 3);
+    EXPECT_EQ(outer_offsets[0], 2);
+    EXPECT_EQ(outer_offsets[1], 4);
+    EXPECT_EQ(outer_offsets[2], 5);
+    const auto& outer_keys = get_nullable_nested_column<ColumnInt32>(outer_map.get_keys());
+    ASSERT_EQ(outer_keys.size(), 5);
+    EXPECT_EQ(outer_keys.get_element(0), 10);
+    EXPECT_EQ(outer_keys.get_element(1), 20);
+    EXPECT_EQ(outer_keys.get_element(2), 30);
+    EXPECT_EQ(outer_keys.get_element(3), 40);
+    EXPECT_EQ(outer_keys.get_element(4), 50);
+
+    const auto& lists = assert_cast<const ColumnNullable&>(outer_map.get_values());
+    ASSERT_EQ(lists.size(), 5);
+    EXPECT_FALSE(lists.is_null_at(0));
+    EXPECT_FALSE(lists.is_null_at(1));
+    EXPECT_TRUE(lists.is_null_at(2));
+    EXPECT_FALSE(lists.is_null_at(3));
+    EXPECT_FALSE(lists.is_null_at(4));
+    const auto& list_column = assert_cast<const ColumnArray&>(lists.get_nested_column());
+    const auto& list_offsets = list_column.get_offsets();
+    ASSERT_EQ(list_offsets.size(), 5);
+    EXPECT_EQ(list_offsets[0], 3);
+    EXPECT_EQ(list_offsets[1], 3);
+    EXPECT_EQ(list_offsets[2], 3);
+    EXPECT_EQ(list_offsets[3], 4);
+    EXPECT_EQ(list_offsets[4], 6);
+}
+
+} // namespace
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp b/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp
new file mode 100644
index 00000000000000..0d0f9a2f8567cc
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_leaf_reader_test.cpp
@@ -0,0 +1,506 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/reader/parquet_leaf_reader.h"
+
+#include <arrow/array/builder_binary.h>
+#include <cctz/time_zone.h>
+#include <gtest/gtest.h>
+#include <parquet/api/schema.h>
+
+#include <cmath>
+#include <cstring>
+#include <functional>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+
+namespace doris::format::parquet {
+namespace {
+
+std::shared_ptr<arrow::Array> fixed_binary_array(const std::vector<std::string>& values,
+                                                 int byte_width) {
+    auto type = arrow::fixed_size_binary(byte_width);
+    arrow::FixedSizeBinaryBuilder builder(type, arrow::default_memory_pool());
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(reinterpret_cast<const uint8_t*>(value.data())).ok());
+    }
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder.Finish(&array).ok());
+    return array;
+}
+
+ParquetLeafReader make_leaf_reader(ParquetTypeDescriptor descriptor, DataTypePtr type) {
+    return ParquetLeafReader(nullptr, descriptor, std::move(type), "leaf", nullptr);
+}
+
+struct CapturedDecodedView {
+    DecodedValueKind value_kind = DecodedValueKind::INT32;
+    DecodedTimeUnit time_unit = DecodedTimeUnit::UNKNOWN;
+    int64_t row_count = 0;
+    int decimal_precision = -1;
+    int decimal_scale = -1;
+    int fixed_length = -1;
+    bool timestamp_is_adjusted_to_utc = false;
+    bool enable_strict_mode = false;
+    const cctz::time_zone* timezone = nullptr;
+    bool null_map_is_null = true;
+    std::vector<uint8_t> null_map;
+    std::vector<uint8_t> fixed_values;
+    std::vector<StringRef> binary_values;
+    std::vector<std::string> owned_binary_values;
+};
+
+ParquetLeafReader make_spy_leaf_reader(ParquetTypeDescriptor descriptor, DataTypePtr type,
+                                       CapturedDecodedView* captured,
+                                       const cctz::time_zone* timezone = nullptr,
+                                       bool enable_strict_mode = false) {
+    auto appender = [captured](MutableColumnPtr&, const DecodedColumnView& view) {
+        captured->value_kind = view.value_kind;
+        captured->time_unit = view.time_unit;
+        captured->row_count = view.row_count;
+        captured->decimal_precision = view.decimal_precision;
+        captured->decimal_scale = view.decimal_scale;
+        captured->fixed_length = view.fixed_length;
+        captured->timestamp_is_adjusted_to_utc = view.timestamp_is_adjusted_to_utc;
+        captured->enable_strict_mode = view.enable_strict_mode;
+        captured->timezone = view.timezone;
+        captured->null_map_is_null = view.null_map == nullptr;
+        captured->null_map.clear();
+        if (view.null_map != nullptr) {
+            captured->null_map.assign(view.null_map, view.null_map + view.row_count);
+        }
+        captured->fixed_values.clear();
+        if (view.values != nullptr && view.value_kind == DecodedValueKind::INT64) {
+            captured->fixed_values.assign(view.values, view.values + view.row_count * 8);
+        } else if (view.values != nullptr && view.value_kind == DecodedValueKind::FLOAT) {
+            captured->fixed_values.assign(view.values, view.values + view.row_count * 4);
+        } else if (view.values != nullptr && view.value_kind == DecodedValueKind::INT32) {
+            captured->fixed_values.assign(view.values, view.values + view.row_count * 4);
+        }
+        captured->binary_values.clear();
+        captured->owned_binary_values.clear();
+        if (view.binary_values != nullptr) {
+            captured->owned_binary_values.reserve(view.binary_values->size());
+            for (const auto& value : *view.binary_values) {
+                captured->owned_binary_values.emplace_back(
+                        value.data == nullptr ? std::string()
+                                              : std::string(value.data, value.size));
+            }
+            captured->binary_values.reserve(captured->owned_binary_values.size());
+            for (const auto& value : captured->owned_binary_values) {
+                captured->binary_values.emplace_back(value.data(), value.size());
+            }
+        }
+        return Status::OK();
+    };
+    return ParquetLeafReader(nullptr, descriptor, std::move(type), "leaf", nullptr, {}, timezone,
+                             enable_strict_mode, std::move(appender));
+}
+
+} // namespace
+
+struct ParquetLeafReaderTestAccess {
+    static ParquetLeafBatch make_fixed_batch(const std::vector<int16_t>& def_levels,
+                                             const std::vector<int16_t>& rep_levels,
+                                             const std::vector<int32_t>& values,
+                                             bool read_dense_for_nullable = false) {
+        ParquetLeafBatch batch;
+        batch._value_kind = DecodedValueKind::INT32;
+        batch._consumed_level_count = static_cast<int64_t>(def_levels.size());
+        batch._decoded_level_count = static_cast<int64_t>(def_levels.size());
+        batch._values_written = static_cast<int64_t>(values.size());
+        batch._def_levels = def_levels.data();
+        batch._rep_levels = rep_levels.data();
+        batch._fixed_values = reinterpret_cast<const uint8_t*>(values.data());
+        batch._read_dense_for_nullable = read_dense_for_nullable;
+        return batch;
+    }
+
+    static Status build_nested_batch(const ParquetLeafReader& reader,
+                                     const ParquetLeafBatch& leaf_batch, int64_t records_read,
+                                     int16_t value_slot_definition_level,
+                                     int16_t value_slot_repetition_level,
+                                     ParquetNestedScalarBatch* nested_batch) {
+        return reader.build_nested_batch_from_leaf_batch(leaf_batch, records_read,
+                                                         value_slot_definition_level, nested_batch,
+                                                         value_slot_repetition_level);
+    }
+};
+
+std::shared_ptr<::parquet::ColumnDescriptor> int32_column_descriptor(int16_t max_definition_level,
+                                                                     int16_t max_repetition_level) {
+    auto node = ::parquet::schema::PrimitiveNode::Make("leaf", ::parquet::Repetition::OPTIONAL,
+                                                       ::parquet::Type::INT32);
+    return std::make_shared<::parquet::ColumnDescriptor>(node, max_definition_level,
+                                                         max_repetition_level);
+}
+
+ParquetLeafReader make_nested_leaf_reader(
+        const std::shared_ptr<::parquet::ColumnDescriptor>& descriptor, DataTypePtr type) {
+    ParquetTypeDescriptor type_descriptor;
+    type_descriptor.physical_type = ::parquet::Type::INT32;
+    type_descriptor.doris_type = type;
+    return ParquetLeafReader(descriptor.get(), type_descriptor, std::move(type), "nested_leaf",
+                             nullptr);
+}
+
+TEST(ParquetLeafReaderTest, DenseNullableFixedValuesAreSpacedBeforeSerde) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::INT32;
+    auto type = make_nullable(std::make_shared<DataTypeInt32>());
+    auto reader = make_leaf_reader(descriptor, type);
+
+    const std::vector<int32_t> compact_values = {10, 30, 50};
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::INT32;
+    batch._fixed_values = reinterpret_cast<const uint8_t*>(compact_values.data());
+    batch._values_written = compact_values.size();
+    batch._read_dense_for_nullable = true;
+
+    const NullMap null_map = {0, 1, 0, 1, 0};
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 5, &null_map, column);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& nullable = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable.size(), 5);
+    EXPECT_FALSE(nullable.is_null_at(0));
+    EXPECT_TRUE(nullable.is_null_at(1));
+    EXPECT_FALSE(nullable.is_null_at(2));
+    EXPECT_TRUE(nullable.is_null_at(3));
+    EXPECT_FALSE(nullable.is_null_at(4));
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    EXPECT_EQ(nested.get_element(0), 10);
+    EXPECT_EQ(nested.get_element(2), 30);
+    EXPECT_EQ(nested.get_element(4), 50);
+}
+
+TEST(ParquetLeafReaderTest, DenseNullableFixedValuesRejectCountMismatch) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::INT32;
+    auto type = make_nullable(std::make_shared<DataTypeInt32>());
+    auto reader = make_leaf_reader(descriptor, type);
+
+    const std::vector<int32_t> compact_values = {10, 30};
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::INT32;
+    batch._fixed_values = reinterpret_cast<const uint8_t*>(compact_values.data());
+    batch._values_written = compact_values.size();
+    batch._read_dense_for_nullable = true;
+
+    const NullMap null_map = {0, 1, 0, 1, 0};
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 5, &null_map, column);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid dense nullable parquet values"), std::string::npos);
+}
+
+TEST(ParquetLeafReaderTest, Float16BinaryValuesAreConvertedToFloat) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY;
+    descriptor.extra_type_info = ParquetExtraTypeInfo::FLOAT16;
+    descriptor.fixed_length = 2;
+    auto type = std::make_shared<DataTypeFloat32>();
+    auto reader = make_leaf_reader(descriptor, type);
+
+    auto half = [](uint16_t value) {
+        std::string bytes(sizeof(value), '\0');
+        memcpy(bytes.data(), &value, sizeof(value));
+        return bytes;
+    };
+
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::FIXED_BINARY;
+    batch._binary_chunks = {fixed_binary_array(
+            {half(0x0000), half(0x8000), half(0x3E00), half(0x0001), half(0x7E00)}, 2)};
+    batch._values_written = 5;
+
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 5, nullptr, column);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& floats = assert_cast<const ColumnFloat32&>(*column);
+    ASSERT_EQ(floats.size(), 5);
+    EXPECT_FLOAT_EQ(floats.get_element(0), 0.0F);
+    EXPECT_TRUE(std::signbit(floats.get_element(1)));
+    EXPECT_FLOAT_EQ(floats.get_element(2), 1.5F);
+    EXPECT_NEAR(floats.get_element(3), 5.9604645e-8F, 1e-12F);
+    EXPECT_TRUE(std::isnan(floats.get_element(4)));
+}
+
+TEST(ParquetLeafReaderTest, BinaryDenseNullableValuesAreSpacedWithNullRefs) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::BYTE_ARRAY;
+    auto type = make_nullable(std::make_shared<DataTypeString>());
+    auto reader = make_leaf_reader(descriptor, type);
+
+    arrow::BinaryBuilder builder;
+    ASSERT_TRUE(builder.Append("aa").ok());
+    ASSERT_TRUE(builder.Append("cc").ok());
+    ASSERT_TRUE(builder.Append("ee").ok());
+    std::shared_ptr<arrow::Array> array;
+    ASSERT_TRUE(builder.Finish(&array).ok());
+
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::BINARY;
+    batch._binary_chunks = {array};
+    batch._values_written = 3;
+    batch._read_dense_for_nullable = true;
+
+    const NullMap null_map = {0, 1, 0, 1, 0};
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 5, &null_map, column);
+    ASSERT_TRUE(status.ok()) << status;
+
+    const auto& nullable = assert_cast<const ColumnNullable&>(*column);
+    const auto& strings = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    ASSERT_EQ(nullable.size(), 5);
+    EXPECT_EQ(strings.get_data_at(0).to_string(), "aa");
+    EXPECT_TRUE(nullable.is_null_at(1));
+    EXPECT_EQ(strings.get_data_at(2).to_string(), "cc");
+    EXPECT_TRUE(nullable.is_null_at(3));
+    EXPECT_EQ(strings.get_data_at(4).to_string(), "ee");
+}
+
+TEST(ParquetLeafReaderTest, BinaryDenseNullableRejectsCountMismatch) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::BYTE_ARRAY;
+    auto type = make_nullable(std::make_shared<DataTypeString>());
+    auto reader = make_leaf_reader(descriptor, type);
+
+    arrow::BinaryBuilder builder;
+    ASSERT_TRUE(builder.Append("only_one").ok());
+    std::shared_ptr<arrow::Array> array;
+    ASSERT_TRUE(builder.Finish(&array).ok());
+
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::BINARY;
+    batch._binary_chunks = {array};
+    batch._values_written = 1;
+    batch._read_dense_for_nullable = true;
+
+    const NullMap null_map = {0, 1, 0};
+    auto column = type->create_column();
+    auto status = reader.append_values(batch, 3, &null_map, column);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid dense nullable parquet binary values"),
+              std::string::npos);
+}
+
+TEST(ParquetLeafReaderTest, DecodedColumnViewCarriesDescriptorSessionAndNullMapFields) {
+    ParquetTypeDescriptor descriptor;
+    descriptor.physical_type = ::parquet::Type::INT64;
+    descriptor.time_unit = ParquetTimeUnit::NANOS;
+    descriptor.decimal_precision = 18;
+    descriptor.decimal_scale = 4;
+    descriptor.fixed_length = 12;
+    descriptor.timestamp_is_adjusted_to_utc = true;
+    auto type = make_nullable(std::make_shared<DataTypeInt64>());
+    cctz::time_zone shanghai;
+    ASSERT_TRUE(cctz::load_time_zone("Asia/Shanghai", &shanghai));
+
+    CapturedDecodedView captured;
+    auto reader = make_spy_leaf_reader(descriptor, type, &captured, &shanghai, true);
+    const std::vector<int64_t> values = {100, 200, 300};
+    ParquetLeafBatch batch;
+    batch._value_kind = DecodedValueKind::INT64;
+    batch._fixed_values = reinterpret_cast<const uint8_t*>(values.data());
+    batch._values_written = values.size();
+
+    const NullMap null_map = {0, 1, 0};
+    auto column = type->create_column();
+    ASSERT_TRUE(reader.append_values(batch, 3, &null_map, column).ok());
+    EXPECT_EQ(captured.value_kind, DecodedValueKind::INT64);
+    EXPECT_EQ(captured.time_unit, DecodedTimeUnit::NANOS);
+    EXPECT_EQ(captured.row_count, 3);
+    EXPECT_EQ(captured.decimal_precision, 18);
+    EXPECT_EQ(captured.decimal_scale, 4);
+    EXPECT_EQ(captured.fixed_length, 12);
+    EXPECT_TRUE(captured.timestamp_is_adjusted_to_utc);
+    EXPECT_TRUE(captured.enable_strict_mode);
+    EXPECT_EQ(captured.timezone, &shanghai);
+    EXPECT_FALSE(captured.null_map_is_null);
+    EXPECT_EQ(captured.null_map, std::vector<uint8_t>({0, 1, 0}));
+
+    auto required_column = type->create_column();
+    ASSERT_TRUE(reader.append_values(batch, 3, nullptr, required_column).ok());
+    EXPECT_TRUE(captured.null_map_is_null);
+
+    const NullMap empty_null_map;
+    ASSERT_TRUE(reader.append_values(batch, 3, &empty_null_map, required_column).ok());
+    EXPECT_TRUE(captured.null_map_is_null);
+}
+
+TEST(ParquetLeafReaderTest, DecodedColumnViewCapturesBinaryFixedLengthAndFloat16Override) {
+    ParquetTypeDescriptor binary_descriptor;
+    binary_descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY;
+    binary_descriptor.fixed_length = 4;
+    auto type = std::make_shared<DataTypeString>();
+
+    CapturedDecodedView binary_view;
+    auto binary_reader = make_spy_leaf_reader(binary_descriptor, type, &binary_view);
+    ParquetLeafBatch binary_batch;
+    binary_batch._value_kind = DecodedValueKind::FIXED_BINARY;
+    binary_batch._binary_chunks = {fixed_binary_array({"abcd", "wxyz"}, 4)};
+    binary_batch._values_written = 2;
+    auto binary_column = type->create_column();
+    ASSERT_TRUE(binary_reader.append_values(binary_batch, 2, nullptr, binary_column).ok());
+    EXPECT_EQ(binary_view.value_kind, DecodedValueKind::FIXED_BINARY);
+    EXPECT_EQ(binary_view.fixed_length, 4);
+    ASSERT_EQ(binary_view.owned_binary_values.size(), 2);
+    EXPECT_EQ(binary_view.owned_binary_values[0], "abcd");
+    EXPECT_EQ(binary_view.owned_binary_values[1], "wxyz");
+
+    ParquetTypeDescriptor float16_descriptor;
+    float16_descriptor.physical_type = ::parquet::Type::FIXED_LEN_BYTE_ARRAY;
+    float16_descriptor.extra_type_info = ParquetExtraTypeInfo::FLOAT16;
+    float16_descriptor.fixed_length = 2;
+    CapturedDecodedView float16_view;
+    auto float16_reader = make_spy_leaf_reader(float16_descriptor,
+                                               std::make_shared<DataTypeFloat32>(), &float16_view);
+    auto half = [](uint16_t value) {
+        std::string bytes(sizeof(value), '\0');
+        memcpy(bytes.data(), &value, sizeof(value));
+        return bytes;
+    };
+    ParquetLeafBatch float16_batch;
+    float16_batch._value_kind = DecodedValueKind::FIXED_BINARY;
+    float16_batch._binary_chunks = {fixed_binary_array({half(0x3E00), half(0x4000)}, 2)};
+    float16_batch._values_written = 2;
+    auto float16_column = std::make_shared<DataTypeFloat32>()->create_column();
+    ASSERT_TRUE(float16_reader.append_values(float16_batch, 2, nullptr, float16_column).ok());
+    EXPECT_EQ(float16_view.value_kind, DecodedValueKind::FLOAT);
+    ASSERT_EQ(float16_view.fixed_values.size(), sizeof(float) * 2);
+    const auto* floats = reinterpret_cast<const float*>(float16_view.fixed_values.data());
+    EXPECT_FLOAT_EQ(floats[0], 1.5F);
+    EXPECT_FLOAT_EQ(floats[1], 2.0F);
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchValueLayoutLevels) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {2, 2, 2};
+    const std::vector<int16_t> rep_levels = {0, 1, 0};
+    const std::vector<int32_t> values = {10, 20, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 2, 2, 1,
+                                                                  &nested_batch);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(nested_batch.records_read, 2);
+    EXPECT_EQ(nested_batch.levels_written, 3);
+    EXPECT_EQ(nested_batch.value_indices, std::vector<int64_t>({0, 1, 2}));
+    const auto& nested_values = assert_cast<const ColumnInt32&>(*nested_batch.values_column);
+    ASSERT_EQ(nested_values.size(), 3);
+    EXPECT_EQ(nested_values.get_element(0), 10);
+    EXPECT_EQ(nested_values.get_element(2), 30);
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchValueLayoutValueSlots) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {2, 1, 2, 0};
+    const std::vector<int16_t> rep_levels = {0, 1, 0, 0};
+    const std::vector<int32_t> values = {10, 777, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 3, 1, 1,
+                                                                  &nested_batch);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(nested_batch.value_indices, std::vector<int64_t>({0, -1, 2, -1}));
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchValueLayoutLeafValues) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {2, 1, 2, 0};
+    const std::vector<int16_t> rep_levels = {0, 1, 0, 0};
+    const std::vector<int32_t> values = {10, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 3, 1, 1,
+                                                                  &nested_batch);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(nested_batch.value_indices, std::vector<int64_t>({0, -1, 1, -1}));
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchValueLayoutPayloadSlots) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {1, 2, 0, 2};
+    const std::vector<int16_t> rep_levels = {0, 0, 0, 0};
+    const std::vector<int32_t> values = {777, 10, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 4, 2, 1,
+                                                                  &nested_batch);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(nested_batch.value_indices, std::vector<int64_t>({-1, 1, -1, 2}));
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchRejectsMismatchedValueLayout) {
+    auto descriptor = int32_column_descriptor(2, 1);
+    auto reader = make_nested_leaf_reader(descriptor, std::make_shared<DataTypeInt32>());
+    const std::vector<int16_t> def_levels = {2, 0, 2, 0};
+    const std::vector<int16_t> rep_levels = {0, 0, 0, 0};
+    const std::vector<int32_t> values = {10, 20, 30};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values);
+
+    ParquetNestedScalarBatch nested_batch;
+    const auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 4, 2, 1,
+                                                                        &nested_batch);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("inconsistent value count"), std::string::npos);
+}
+
+TEST(ParquetLeafReaderTest, NestedBatchRejectsDenseNullable) {
+    auto descriptor = int32_column_descriptor(1, 0);
+    auto reader =
+            make_nested_leaf_reader(descriptor, make_nullable(std::make_shared<DataTypeInt32>()));
+    const std::vector<int16_t> def_levels = {1};
+    const std::vector<int16_t> rep_levels = {0};
+    const std::vector<int32_t> values = {10};
+    const auto leaf_batch =
+            ParquetLeafReaderTestAccess::make_fixed_batch(def_levels, rep_levels, values, true);
+
+    ParquetNestedScalarBatch nested_batch;
+    const auto status = ParquetLeafReaderTestAccess::build_nested_batch(reader, leaf_batch, 1, 0, 0,
+                                                                        &nested_batch);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Dense nullable parquet nested reader is not supported"),
+              std::string::npos);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_page_cache_range_test.cpp b/be/test/format_v2/parquet/parquet_page_cache_range_test.cpp
new file mode 100644
index 00000000000000..f8e12206bb1220
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_page_cache_range_test.cpp
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "format_v2/parquet/parquet_file_context.h"
+
+namespace doris::format::parquet {
+namespace {
+
+void expect_plan_entry(const ParquetPageCacheReadPlanEntry& entry,
+                       const ParquetPageCacheRange& cached_range, int64_t copy_offset_in_cache,
+                       int64_t output_offset, int64_t copy_size) {
+    EXPECT_EQ(entry.cached_range.offset, cached_range.offset);
+    EXPECT_EQ(entry.cached_range.size, cached_range.size);
+    EXPECT_EQ(entry.copy_offset_in_cache, copy_offset_in_cache);
+    EXPECT_EQ(entry.output_offset, output_offset);
+    EXPECT_EQ(entry.copy_size, copy_size);
+}
+
+TEST(ParquetPageCacheRangeTest, SubsetRequestHitsSingleCachedRange) {
+    const std::vector<ParquetPageCacheRange> cached_ranges = {
+            {100, 100},
+    };
+
+    // Request [120, 150) is fully inside cached [100, 200). The reader should lookup
+    // the exact cached key [100, 200), then copy from cached offset 20 into output offset 0.
+    auto plan = detail::plan_page_cache_range_read(120, 30, cached_ranges);
+
+    ASSERT_EQ(plan.size(), 1);
+    expect_plan_entry(plan[0], {100, 100}, 20, 0, 30);
+}
+
+TEST(ParquetPageCacheRangeTest, SupersetRequestHitsMultipleAdjacentCachedRanges) {
+    const std::vector<ParquetPageCacheRange> cached_ranges = {
+            {180, 80},
+            {100, 80},
+    };
+
+    // Request [100, 260) is larger than either cached entry, but the two cached ranges
+    // exactly cover it. The copy plan stitches the two exact cache entries together.
+    auto plan = detail::plan_page_cache_range_read(100, 160, cached_ranges);
+
+    ASSERT_EQ(plan.size(), 2);
+    expect_plan_entry(plan[0], {100, 80}, 0, 0, 80);
+    expect_plan_entry(plan[1], {180, 80}, 0, 80, 80);
+}
+
+TEST(ParquetPageCacheRangeTest, SupersetRequestCanUseOverlappingCachedRanges) {
+    const std::vector<ParquetPageCacheRange> cached_ranges = {
+            {150, 110},
+            {100, 100},
+    };
+
+    // Request [100, 260) is covered by overlapping cached ranges. The first copy uses
+    // [100, 200); the second resumes at cursor 200 and copies the tail from [150, 260).
+    auto plan = detail::plan_page_cache_range_read(100, 160, cached_ranges);
+
+    ASSERT_EQ(plan.size(), 2);
+    expect_plan_entry(plan[0], {100, 100}, 0, 0, 100);
+    expect_plan_entry(plan[1], {150, 110}, 50, 100, 60);
+}
+
+TEST(ParquetPageCacheRangeTest, PartialOverlapWithoutFullCoverageMisses) {
+    const std::vector<ParquetPageCacheRange> cached_ranges = {
+            {100, 80},
+            {200, 60},
+    };
+
+    // Cached ranges cover [100, 180) and [200, 260), but [180, 200) is missing.
+    // The caller must read the whole request from the file instead of returning
+    // a partially cached result.
+    auto plan = detail::plan_page_cache_range_read(100, 160, cached_ranges);
+
+    EXPECT_TRUE(plan.empty());
+}
+
+TEST(ParquetPageCacheRangeTest, NonCoveringAndInvalidRangesAreIgnored) {
+    const std::vector<ParquetPageCacheRange> cached_ranges = {
+            {50, 20}, {100, 0}, {100, -1}, {180, 20}, {120, 30},
+    };
+
+    // Only [120, 150) intersects the request, but it does not cover the request start
+    // [100, 120), so this is still a miss.
+    auto plan = detail::plan_page_cache_range_read(100, 50, cached_ranges);
+
+    EXPECT_TRUE(plan.empty());
+}
+
+TEST(ParquetPageCacheRangeTest, InvalidRequestMisses) {
+    const std::vector<ParquetPageCacheRange> cached_ranges = {
+            {100, 100},
+    };
+
+    EXPECT_TRUE(detail::plan_page_cache_range_read(-1, 10, cached_ranges).empty());
+    EXPECT_TRUE(detail::plan_page_cache_range_read(100, 0, cached_ranges).empty());
+    EXPECT_TRUE(detail::plan_page_cache_range_read(100, -1, cached_ranges).empty());
+}
+
+} // namespace
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_reader_control_test.cpp b/be/test/format_v2/parquet/parquet_reader_control_test.cpp
new file mode 100644
index 00000000000000..c7d430350d1b26
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_reader_control_test.cpp
@@ -0,0 +1,1034 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/column/column_array.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "format_v2/column_data.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_statistics.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/parquet/reader/global_rowid_column_reader.h"
+#include "format_v2/parquet/reader/list_column_reader.h"
+#include "format_v2/parquet/reader/map_column_reader.h"
+#include "format_v2/parquet/reader/nested_column_materializer.h"
+#include "format_v2/parquet/reader/row_position_column_reader.h"
+#include "format_v2/parquet/reader/scalar_column_reader.h"
+#include "format_v2/parquet/reader/struct_column_reader.h"
+#include "format_v2/parquet/selection_vector.h"
+#include "storage/utils.h"
+
+namespace doris::format::parquet {
+namespace {
+
+ParquetColumnSchema int64_schema(std::string name = "mock") {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = std::move(name);
+    schema.type = std::make_shared<DataTypeInt64>();
+    return schema;
+}
+
+ParquetColumnSchema nested_int64_schema(std::string name, int16_t nullable_definition_level,
+                                        int16_t definition_level, int16_t repetition_level = 0,
+                                        int16_t repeated_ancestor_definition_level = 0) {
+    ParquetColumnSchema schema = int64_schema(std::move(name));
+    schema.type = make_nullable(std::make_shared<DataTypeInt64>());
+    schema.nullable_definition_level = nullable_definition_level;
+    schema.definition_level = definition_level;
+    schema.repetition_level = repetition_level;
+    schema.repeated_repetition_level = repetition_level;
+    schema.repeated_ancestor_definition_level = repeated_ancestor_definition_level;
+    return schema;
+}
+
+ParquetColumnSchema nested_struct_schema() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "struct";
+    schema.kind = ParquetColumnSchemaKind::STRUCT;
+    schema.nullable_definition_level = 1;
+    schema.definition_level = 2;
+    schema.type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {make_nullable(std::make_shared<DataTypeInt64>()),
+                       make_nullable(std::make_shared<DataTypeInt64>())},
+            Strings {"a", "b"}));
+    return schema;
+}
+
+ParquetColumnSchema nested_list_schema(std::string name, DataTypePtr element_type,
+                                       int16_t nullable_definition_level, int16_t definition_level,
+                                       int16_t repetition_level,
+                                       int16_t repeated_ancestor_definition_level) {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = std::move(name);
+    schema.kind = ParquetColumnSchemaKind::LIST;
+    schema.nullable_definition_level = nullable_definition_level;
+    schema.definition_level = definition_level;
+    schema.repetition_level = repetition_level;
+    schema.repeated_repetition_level = repetition_level;
+    schema.repeated_ancestor_definition_level = repeated_ancestor_definition_level;
+    schema.type = make_nullable(std::make_shared<DataTypeArray>(std::move(element_type)));
+    return schema;
+}
+
+ParquetColumnSchema nested_map_schema(
+        DataTypePtr value_type = make_nullable(std::make_shared<DataTypeInt64>())) {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "map";
+    schema.kind = ParquetColumnSchemaKind::MAP;
+    schema.nullable_definition_level = 1;
+    schema.definition_level = 2;
+    schema.repetition_level = 1;
+    schema.repeated_ancestor_definition_level = 2;
+    schema.type = make_nullable(std::make_shared<DataTypeMap>(
+            make_nullable(std::make_shared<DataTypeInt64>()), std::move(value_type)));
+    return schema;
+}
+
+ParquetColumnSchema bare_repeated_int64_list_schema() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "repeated";
+    schema.kind = ParquetColumnSchemaKind::LIST;
+    schema.definition_level = 1;
+    schema.repetition_level = 1;
+    schema.repeated_repetition_level = 1;
+    schema.repeated_ancestor_definition_level = 1;
+    schema.type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeInt64>());
+    return schema;
+}
+
+std::unique_ptr<ParquetColumnSchema> primitive_child(int local_id, std::string name,
+                                                     DataTypePtr type) {
+    auto child = std::make_unique<ParquetColumnSchema>();
+    child->local_id = local_id;
+    child->name = std::move(name);
+    child->kind = ParquetColumnSchemaKind::PRIMITIVE;
+    child->leaf_column_id = local_id;
+    child->type = std::move(type);
+    child->type_descriptor.physical_type = ::parquet::Type::INT32;
+    child->type_descriptor.doris_type = child->type;
+    return child;
+}
+
+ParquetColumnSchema struct_schema_for_projection() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "s";
+    schema.kind = ParquetColumnSchemaKind::STRUCT;
+    schema.children.push_back(primitive_child(0, "a", std::make_shared<DataTypeInt32>()));
+    schema.children.push_back(primitive_child(1, "b", std::make_shared<DataTypeInt32>()));
+    DataTypes types = {make_nullable(schema.children[0]->type),
+                       make_nullable(schema.children[1]->type)};
+    Strings names = {"a", "b"};
+    schema.type = std::make_shared<DataTypeStruct>(types, names);
+    return schema;
+}
+
+ParquetColumnSchema list_schema_for_projection() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "xs";
+    schema.kind = ParquetColumnSchemaKind::LIST;
+    schema.children.push_back(primitive_child(0, "element", std::make_shared<DataTypeInt32>()));
+    schema.type = std::make_shared<DataTypeArray>(schema.children[0]->type);
+    return schema;
+}
+
+ParquetColumnSchema map_schema_for_projection() {
+    ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "m";
+    schema.kind = ParquetColumnSchemaKind::MAP;
+    schema.children.push_back(primitive_child(0, "key", std::make_shared<DataTypeString>()));
+    schema.children.push_back(primitive_child(1, "value", std::make_shared<DataTypeInt32>()));
+    schema.type = std::make_shared<DataTypeMap>(make_nullable(schema.children[0]->type),
+                                                make_nullable(schema.children[1]->type));
+    return schema;
+}
+
+class CursorColumnReader final : public ParquetColumnReader {
+public:
+    CursorColumnReader() : ParquetColumnReader(int64_schema(), std::make_shared<DataTypeInt64>()) {}
+
+    Status read(int64_t rows, MutableColumnPtr& column, int64_t* rows_read) override {
+        if (column.get() == nullptr || rows_read == nullptr) {
+            return Status::InvalidArgument("invalid mock read arguments");
+        }
+        auto* values = assert_cast<ColumnInt64*>(column.get());
+        for (int64_t row = 0; row < rows; ++row) {
+            values->insert_value(_cursor + row);
+        }
+        _read_lengths.push_back(rows);
+        _cursor += rows;
+        *rows_read = rows;
+        return Status::OK();
+    }
+
+    Status skip(int64_t rows) override {
+        _skip_lengths.push_back(rows);
+        _cursor += rows;
+        return Status::OK();
+    }
+
+    int64_t cursor() const { return _cursor; }
+    const std::vector<int64_t>& skip_lengths() const { return _skip_lengths; }
+    const std::vector<int64_t>& read_lengths() const { return _read_lengths; }
+
+private:
+    int64_t _cursor = 0;
+    std::vector<int64_t> _skip_lengths;
+    std::vector<int64_t> _read_lengths;
+};
+
+class NestedBuildReader final : public ParquetColumnReader {
+public:
+    explicit NestedBuildReader(int64_t values_to_build)
+            : ParquetColumnReader(int64_schema("nested"), std::make_shared<DataTypeInt64>()),
+              _values_to_build(values_to_build) {}
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override {
+        return Status::NotSupported("unused");
+    }
+
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override {
+        if (column.get() == nullptr || values_read == nullptr) {
+            return Status::InvalidArgument("invalid mock nested build arguments");
+        }
+        _last_length_upper_bound = length_upper_bound;
+        auto* values = assert_cast<ColumnInt64*>(column.get());
+        for (int64_t value = 0; value < _values_to_build; ++value) {
+            values->insert_value(value);
+        }
+        *values_read = _values_to_build;
+        return Status::OK();
+    }
+
+    int64_t last_length_upper_bound() const { return _last_length_upper_bound; }
+
+private:
+    int64_t _values_to_build = 0;
+    int64_t _last_length_upper_bound = 0;
+};
+
+class ScriptedNestedReader final : public ParquetColumnReader {
+public:
+    ScriptedNestedReader(ParquetColumnSchema schema, DataTypePtr type,
+                         std::vector<int16_t> def_levels, std::vector<int16_t> rep_levels,
+                         bool has_repeated_child = false, bool build_nulls = false)
+            : ParquetColumnReader(schema, std::move(type)),
+              _def_levels(std::move(def_levels)),
+              _rep_levels(std::move(rep_levels)),
+              _has_repeated_child(has_repeated_child),
+              _build_nulls(build_nulls) {}
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override {
+        return Status::NotSupported("unused");
+    }
+
+    Status load_nested_batch(int64_t rows) override {
+        _load_lengths.push_back(rows);
+        return Status::OK();
+    }
+
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override {
+        _build_lengths.push_back(length_upper_bound);
+        if (column.get() == nullptr || values_read == nullptr) {
+            return Status::InvalidArgument("invalid scripted nested build arguments");
+        }
+        for (int64_t row = 0; row < length_upper_bound; ++row) {
+            insert_value(column, _next_value++, _build_nulls);
+        }
+        *values_read = length_upper_bound;
+        return Status::OK();
+    }
+
+    const std::vector<int16_t>& nested_definition_levels() const override { return _def_levels; }
+    const std::vector<int16_t>& nested_repetition_levels() const override { return _rep_levels; }
+    int64_t nested_levels_written() const override {
+        return static_cast<int64_t>(_def_levels.size());
+    }
+    bool is_or_has_repeated_child() const override { return _has_repeated_child; }
+
+    const std::vector<int64_t>& build_lengths() const { return _build_lengths; }
+
+private:
+    static void insert_value(MutableColumnPtr& column, int64_t value, bool is_null) {
+        if (auto* nullable_column = check_and_get_column<ColumnNullable>(*column);
+            nullable_column != nullptr) {
+            if (is_null) {
+                nullable_column->insert_default();
+                return;
+            }
+            assert_cast<ColumnInt64&>(nullable_column->get_nested_column()).insert_value(value);
+            nullable_column->get_null_map_data().push_back(0);
+            return;
+        }
+        assert_cast<ColumnInt64&>(*column).insert_value(value);
+    }
+
+    std::vector<int16_t> _def_levels;
+    std::vector<int16_t> _rep_levels;
+    bool _has_repeated_child = false;
+    bool _build_nulls = false;
+    int64_t _next_value = 0;
+    std::vector<int64_t> _load_lengths;
+    std::vector<int64_t> _build_lengths;
+};
+
+} // namespace
+
+struct ScalarColumnReaderTestAccess {
+    static void set_nested_batch(ScalarColumnReader* reader,
+                                 std::unique_ptr<ParquetNestedScalarBatch> batch) {
+        reader->_nested_batch = std::move(batch);
+    }
+
+    static int64_t page_filtered_rows_to_skip(const ScalarColumnReader& reader, int64_t rows) {
+        return reader.page_filtered_rows_to_skip(rows);
+    }
+
+    static void set_row_group_rows_read(ScalarColumnReader* reader, int64_t rows) {
+        reader->_row_group_rows_read = rows;
+    }
+};
+
+namespace {
+
+std::unique_ptr<ScalarColumnReader> make_scripted_scalar_reader(
+        ParquetColumnSchema schema, std::unique_ptr<ParquetNestedScalarBatch> batch) {
+    auto reader = std::make_unique<ScalarColumnReader>(schema, nullptr);
+    ScalarColumnReaderTestAccess::set_nested_batch(reader.get(), std::move(batch));
+    return reader;
+}
+
+std::unique_ptr<ParquetNestedScalarBatch> scalar_batch(std::vector<int16_t> def_levels,
+                                                       std::vector<int16_t> rep_levels,
+                                                       std::vector<int64_t> value_indices,
+                                                       std::vector<int64_t> values) {
+    auto batch = std::make_unique<ParquetNestedScalarBatch>();
+    batch->levels_written = static_cast<int64_t>(def_levels.size());
+    batch->def_levels = std::move(def_levels);
+    batch->rep_levels = std::move(rep_levels);
+    batch->value_indices = std::move(value_indices);
+    auto column = ColumnInt64::create();
+    for (const auto value : values) {
+        column->insert_value(value);
+    }
+    batch->values_column = std::move(column);
+    return batch;
+}
+
+class DefaultOnlyReader final : public ParquetColumnReader {
+public:
+    DefaultOnlyReader()
+            : ParquetColumnReader(int64_schema("default_only"), std::make_shared<DataTypeInt64>()) {
+    }
+
+    Status read(int64_t, MutableColumnPtr&, int64_t*) override {
+        return Status::NotSupported("unused");
+    }
+};
+
+GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) {
+    const auto ref = column.get_data_at(row);
+    EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2));
+    GlobalRowLoacationV2 location(0, 0, 0, 0);
+    std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2));
+    return location;
+}
+
+} // namespace
+
+TEST(SelectionVectorTest, IdentitySelectionToRanges) {
+    SelectionVector selection;
+    const auto ranges = selection_to_ranges(selection, 5);
+    ASSERT_EQ(ranges.size(), 1);
+    EXPECT_EQ(ranges[0].start, 0);
+    EXPECT_EQ(ranges[0].length, 5);
+    EXPECT_TRUE(selection.verify(5, 5).ok());
+}
+
+TEST(SelectionVectorTest, ExternalBufferSelectionToRanges) {
+    SelectionVector::Index indices[] = {0, 1, 4, 6, 7};
+    SelectionVector selection(indices, std::size(indices));
+    const auto ranges = selection_to_ranges(selection, std::size(indices));
+    ASSERT_EQ(ranges.size(), 3);
+    EXPECT_EQ(ranges[0].start, 0);
+    EXPECT_EQ(ranges[0].length, 2);
+    EXPECT_EQ(ranges[1].start, 4);
+    EXPECT_EQ(ranges[1].length, 1);
+    EXPECT_EQ(ranges[2].start, 6);
+    EXPECT_EQ(ranges[2].length, 2);
+    EXPECT_TRUE(selection.verify(std::size(indices), 8).ok());
+}
+
+TEST(SelectionVectorTest, VerifyRejectsInvalidSelection) {
+    SelectionVector selection(2);
+    EXPECT_FALSE(selection.verify(3, 3).ok());
+    EXPECT_FALSE(selection.verify(1, -1).ok());
+
+    selection.set_index(0, 2);
+    selection.set_index(1, 1);
+    EXPECT_FALSE(selection.verify(2, 3).ok());
+
+    selection.set_index(0, 0);
+    selection.set_index(1, 3);
+    EXPECT_FALSE(selection.verify(2, 3).ok());
+}
+
+TEST(ParquetColumnReaderControlTest, BaseSelectUsesSkipReadRanges) {
+    CursorColumnReader reader;
+    SelectionVector selection(3);
+    selection.set_index(0, 0);
+    selection.set_index(1, 2);
+    selection.set_index(2, 4);
+
+    auto column = std::make_shared<DataTypeInt64>()->create_column();
+    ASSERT_TRUE(reader.select(selection, 3, 6, column).ok());
+
+    const auto& values = assert_cast<const ColumnInt64&>(*column);
+    ASSERT_EQ(values.size(), 3);
+    EXPECT_EQ(values.get_element(0), 0);
+    EXPECT_EQ(values.get_element(1), 2);
+    EXPECT_EQ(values.get_element(2), 4);
+    EXPECT_EQ(reader.cursor(), 6);
+    EXPECT_EQ(reader.read_lengths(), std::vector<int64_t>({1, 1, 1}));
+    EXPECT_EQ(reader.skip_lengths(), std::vector<int64_t>({0, 1, 1, 1}));
+}
+
+TEST(ParquetColumnReaderControlTest, BaseSelectZeroRowsConsumesBatch) {
+    CursorColumnReader reader;
+    SelectionVector selection;
+    auto column = std::make_shared<DataTypeInt64>()->create_column();
+    ASSERT_TRUE(reader.select(selection, 0, 4, column).ok());
+    EXPECT_EQ(column->size(), 0);
+    EXPECT_EQ(reader.cursor(), 4);
+    EXPECT_TRUE(reader.read_lengths().empty());
+    EXPECT_EQ(reader.skip_lengths(), std::vector<int64_t>({4}));
+}
+
+TEST(ParquetColumnReaderControlTest, BaseNestedDefaultsAndSkipNested) {
+    DefaultOnlyReader base_reader;
+    EXPECT_FALSE(base_reader.skip(1).ok());
+    EXPECT_FALSE(base_reader.load_nested_batch(1).ok());
+
+    auto column = std::make_shared<DataTypeInt64>()->create_column();
+    int64_t values_read = 0;
+    EXPECT_FALSE(base_reader.build_nested_column(1, column, &values_read).ok());
+
+    NestedBuildReader ok_reader(3);
+    ASSERT_TRUE(ok_reader.skip_nested_column(3).ok());
+    EXPECT_EQ(ok_reader.last_length_upper_bound(), 3);
+
+    NestedBuildReader short_reader(2);
+    EXPECT_FALSE(short_reader.skip_nested_column(3).ok());
+}
+
+TEST(ParquetColumnReaderControlTest, NestedMaterializerHelpersAppendOffsetsAndParentNulls) {
+    ColumnArray::Offsets64 offsets;
+    append_offsets(offsets, {3, 0, 2});
+    ASSERT_EQ(offsets.size(), 3);
+    EXPECT_EQ(offsets[0], 3);
+    EXPECT_EQ(offsets[1], 3);
+    EXPECT_EQ(offsets[2], 5);
+    append_offsets(offsets, {1, 4});
+    ASSERT_EQ(offsets.size(), 5);
+    EXPECT_EQ(offsets[3], 6);
+    EXPECT_EQ(offsets[4], 10);
+
+    const NullMap parent_nulls = {0, 1, 0};
+    append_parent_nulls(nullptr, parent_nulls);
+    NullMap dst = {1};
+    append_parent_nulls(&dst, parent_nulls);
+    EXPECT_EQ(dst, NullMap({1, 0, 1, 0}));
+}
+
+TEST(ParquetColumnReaderControlTest, PageFilteredRowsToSkipUsesOnlyFullSkippedRanges) {
+    ParquetPageSkipPlan page_skip_plan;
+    page_skip_plan.skipped_ranges = {RowRange {0, 3}, RowRange {5, 2}, RowRange {10, 4}};
+
+    auto schema = nested_int64_schema("page_filtered", 0, 0);
+    ScalarColumnReader reader(schema, nullptr, &page_skip_plan);
+    EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 3), 3);
+    EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 5), 3);
+
+    ScalarColumnReaderTestAccess::set_row_group_rows_read(&reader, 5);
+    EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 2), 2);
+    EXPECT_EQ(ScalarColumnReaderTestAccess::page_filtered_rows_to_skip(reader, 5), 2);
+}
+
+TEST(ParquetColumnReaderControlTest, StructSkipsNullParentForRepeatedChildAndBatchesPresentRows) {
+    auto repeated_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("repeated_shape", 1, 2, 1),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {2, 2, 2, 2},
+            std::vector<int16_t> {0, 0, 0, 0}, true);
+    auto* repeated_child_ptr = repeated_child.get();
+    auto scalar_child = make_scripted_scalar_reader(
+            nested_int64_schema("scalar_child", 1, 2),
+            scalar_batch({2, 0, 2, 2}, {0, 0, 0, 0}, {0, -1, 1, 2}, {10, 20, 30}));
+    auto* scalar_child_ptr = scalar_child.get();
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> children;
+    children.push_back(std::move(repeated_child));
+    children.push_back(std::move(scalar_child));
+    StructColumnReader reader(nested_struct_schema(),
+                              make_nullable(std::make_shared<DataTypeStruct>(
+                                      DataTypes {make_nullable(std::make_shared<DataTypeInt64>()),
+                                                 make_nullable(std::make_shared<DataTypeInt64>())},
+                                      Strings {"a", "b"})),
+                              std::move(children), {0, 1});
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(4, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 4);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 4);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_FALSE(nullable_column.is_null_at(3));
+    EXPECT_EQ(repeated_child_ptr->build_lengths(), std::vector<int64_t>({1, 2}));
+    EXPECT_EQ(scalar_child_ptr->nested_build_level_cursor(), 4);
+}
+
+TEST(ParquetColumnReaderControlTest, StructFallsBackToFirstChildWhenAllChildrenAreRepeated) {
+    auto first_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("first", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 0}, std::vector<int16_t> {0, 0}, true);
+    auto second_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("second", 1, 2, 1),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {2, 2},
+            std::vector<int16_t> {0, 0}, true);
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> children;
+    children.push_back(std::move(first_child));
+    children.push_back(std::move(second_child));
+    StructColumnReader reader(nested_struct_schema(), nested_struct_schema().type,
+                              std::move(children), {0, 1});
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(2, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(rows_read, 2);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_TRUE(nullable_column.is_null_at(1));
+}
+
+TEST(ParquetColumnReaderControlTest, StructNullParentAdvancesComplexChildShapeOnly) {
+    auto shape_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("shape", 1, 2), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 2, 0, 0, 2, 2}, std::vector<int16_t> {0, 0, 0, 0, 0, 0});
+
+    ParquetColumnSchema map_schema = nested_map_schema();
+    map_schema.nullable_definition_level = 2;
+    map_schema.definition_level = 3;
+    map_schema.repeated_ancestor_definition_level = 0;
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 3, 3, 1, 0),
+            make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {3, 3, 0, 0, 3, 3}, std::vector<int16_t> {0, 0, 0, 0, 0, 0});
+    auto value_reader =
+            make_scripted_scalar_reader(nested_int64_schema("value", 4, 4, 1, 0),
+                                        scalar_batch({4, 4, 0, 0, 4, 4}, {0, 0, 0, 0, 0, 0},
+                                                     {0, 1, -1, -1, 2, 3}, {10, 20, 30, 40}));
+    auto map_reader = std::make_unique<MapColumnReader>(
+            map_schema, map_schema.type, std::move(key_reader), std::move(value_reader));
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> children;
+    children.push_back(std::move(shape_child));
+    children.push_back(std::move(map_reader));
+    auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(DataTypes {map_schema.type},
+                                                                      Strings {"partitionValues"}));
+    StructColumnReader reader(nested_struct_schema(), struct_type, std::move(children), {-1, 0});
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(6, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 6);
+
+    const auto& nullable_struct = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_struct.size(), 6);
+    EXPECT_FALSE(nullable_struct.is_null_at(0));
+    EXPECT_FALSE(nullable_struct.is_null_at(1));
+    EXPECT_TRUE(nullable_struct.is_null_at(2));
+    EXPECT_TRUE(nullable_struct.is_null_at(3));
+    EXPECT_FALSE(nullable_struct.is_null_at(4));
+    EXPECT_FALSE(nullable_struct.is_null_at(5));
+
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_struct.get_nested_column());
+    const auto& map_nullable = assert_cast<const ColumnNullable&>(struct_column.get_column(0));
+    ASSERT_EQ(map_nullable.size(), 6);
+    EXPECT_FALSE(map_nullable.is_null_at(0));
+    EXPECT_FALSE(map_nullable.is_null_at(1));
+    EXPECT_TRUE(map_nullable.is_null_at(2));
+    EXPECT_TRUE(map_nullable.is_null_at(3));
+    EXPECT_FALSE(map_nullable.is_null_at(4));
+    EXPECT_FALSE(map_nullable.is_null_at(5));
+    const auto& map_column = assert_cast<const ColumnMap&>(map_nullable.get_nested_column());
+    ASSERT_EQ(map_column.get_offsets().size(), 6);
+    EXPECT_EQ(map_column.get_offsets()[0], 1);
+    EXPECT_EQ(map_column.get_offsets()[1], 2);
+    EXPECT_EQ(map_column.get_offsets()[2], 2);
+    EXPECT_EQ(map_column.get_offsets()[3], 2);
+    EXPECT_EQ(map_column.get_offsets()[4], 3);
+    EXPECT_EQ(map_column.get_offsets()[5], 4);
+}
+
+TEST(ParquetColumnReaderControlTest, StructNullParentAdvancesNestedStructDescendants) {
+    auto shape_child = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("shape", 1, 2), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 0, 2}, std::vector<int16_t> {0, 0, 0});
+
+    auto id_batch = scalar_batch({4, 3, 4}, {0, 0, 0}, {0, -1, 1}, {10, 20});
+    id_batch->value_slot_definition_level = 3;
+    auto id_reader =
+            make_scripted_scalar_reader(nested_int64_schema("id", 3, 4), std::move(id_batch));
+
+    ParquetColumnSchema inner_schema;
+    inner_schema.local_id = 0;
+    inner_schema.name = "stats_parsed";
+    inner_schema.kind = ParquetColumnSchemaKind::STRUCT;
+    inner_schema.nullable_definition_level = 2;
+    inner_schema.definition_level = 3;
+    inner_schema.type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {make_nullable(std::make_shared<DataTypeInt64>())}, Strings {"id"}));
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> inner_children;
+    inner_children.push_back(std::move(id_reader));
+    auto inner_reader = std::make_unique<StructColumnReader>(
+            inner_schema, inner_schema.type, std::move(inner_children), std::vector<int> {0});
+
+    std::vector<std::unique_ptr<ParquetColumnReader>> outer_children;
+    outer_children.push_back(std::move(shape_child));
+    outer_children.push_back(std::move(inner_reader));
+    auto outer_type = make_nullable(std::make_shared<DataTypeStruct>(DataTypes {inner_schema.type},
+                                                                     Strings {"stats_parsed"}));
+    StructColumnReader reader(nested_struct_schema(), outer_type, std::move(outer_children),
+                              {-1, 0});
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(3, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& outer_nullable = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(outer_nullable.size(), 3);
+    EXPECT_FALSE(outer_nullable.is_null_at(0));
+    EXPECT_TRUE(outer_nullable.is_null_at(1));
+    EXPECT_FALSE(outer_nullable.is_null_at(2));
+
+    const auto& outer_struct = assert_cast<const ColumnStruct&>(outer_nullable.get_nested_column());
+    const auto& inner_nullable = assert_cast<const ColumnNullable&>(outer_struct.get_column(0));
+    ASSERT_EQ(inner_nullable.size(), 3);
+    EXPECT_FALSE(inner_nullable.is_null_at(0));
+    EXPECT_TRUE(inner_nullable.is_null_at(1));
+    EXPECT_FALSE(inner_nullable.is_null_at(2));
+
+    const auto& inner_struct = assert_cast<const ColumnStruct&>(inner_nullable.get_nested_column());
+    const auto& id_nullable = assert_cast<const ColumnNullable&>(inner_struct.get_column(0));
+    const auto& id_values = assert_cast<const ColumnInt64&>(id_nullable.get_nested_column());
+    EXPECT_EQ(id_values.get_element(0), 10);
+    EXPECT_EQ(id_values.get_element(2), 20);
+}
+
+TEST(ParquetColumnReaderControlTest, ListKeepsEmptyBareRepeatedPrimitiveRows) {
+    auto element_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("element", 0, 1, 1, 1), std::make_shared<DataTypeInt64>(),
+            std::vector<int16_t> {0, 1, 1, 0}, std::vector<int16_t> {0, 0, 1, 0});
+    auto* element_reader_ptr = element_reader.get();
+    ListColumnReader reader(bare_repeated_int64_list_schema(),
+                            bare_repeated_int64_list_schema().type, std::move(element_reader));
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(3, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 3);
+
+    const auto& array_column = assert_cast<const ColumnArray&>(*column);
+    ASSERT_EQ(array_column.get_offsets().size(), 3);
+    EXPECT_EQ(array_column.get_offsets()[0], 0);
+    EXPECT_EQ(array_column.get_offsets()[1], 2);
+    EXPECT_EQ(array_column.get_offsets()[2], 2);
+    EXPECT_EQ(element_reader_ptr->build_lengths(), std::vector<int64_t>({2}));
+}
+
+TEST(ParquetColumnReaderControlTest, NestedListSkipsAncestorEmptyRowsButKeepsNullElements) {
+    auto element_reader =
+            std::make_unique<ScriptedNestedReader>(nested_int64_schema("element", 5, 5, 2, 4),
+                                                   make_nullable(std::make_shared<DataTypeInt64>()),
+                                                   std::vector<int16_t> {1, 5, 5, 5, 2, 5, 2, 0},
+                                                   std::vector<int16_t> {0, 0, 2, 1, 0, 1, 1, 0});
+    auto* element_reader_ptr = element_reader.get();
+
+    const auto inner_type = make_nullable(
+            std::make_shared<DataTypeArray>(make_nullable(std::make_shared<DataTypeInt64>())));
+    auto inner_reader = std::make_unique<ListColumnReader>(
+            nested_list_schema("inner", make_nullable(std::make_shared<DataTypeInt64>()), 3, 4, 2,
+                               2),
+            inner_type, std::move(element_reader));
+    auto outer_type = make_nullable(std::make_shared<DataTypeArray>(inner_type));
+    ListColumnReader reader(nested_list_schema("outer", inner_type, 1, 2, 1, 2), outer_type,
+                            std::move(inner_reader));
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(4, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 4);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 4);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+    EXPECT_FALSE(nullable_column.is_null_at(2));
+    EXPECT_TRUE(nullable_column.is_null_at(3));
+
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 4);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 2);
+    EXPECT_EQ(outer_offsets[2], 5);
+    EXPECT_EQ(outer_offsets[3], 5);
+
+    const auto& inner_nullable = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(inner_nullable.size(), 5);
+    EXPECT_FALSE(inner_nullable.is_null_at(0));
+    EXPECT_FALSE(inner_nullable.is_null_at(1));
+    EXPECT_TRUE(inner_nullable.is_null_at(2));
+    EXPECT_FALSE(inner_nullable.is_null_at(3));
+    EXPECT_TRUE(inner_nullable.is_null_at(4));
+
+    const auto& inner_array = assert_cast<const ColumnArray&>(inner_nullable.get_nested_column());
+    const auto& inner_offsets = inner_array.get_offsets();
+    ASSERT_EQ(inner_offsets.size(), 5);
+    EXPECT_EQ(inner_offsets[0], 2);
+    EXPECT_EQ(inner_offsets[1], 3);
+    EXPECT_EQ(inner_offsets[2], 3);
+    EXPECT_EQ(inner_offsets[3], 4);
+    EXPECT_EQ(inner_offsets[4], 4);
+    EXPECT_EQ(element_reader_ptr->build_lengths(), std::vector<int64_t>({4}));
+}
+
+TEST(ParquetColumnReaderControlTest, MapKeepsEmptyMapRows) {
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1, 2),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {1},
+            std::vector<int16_t> {0});
+    auto value_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("value", 2, 3, 1, 2),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {1},
+            std::vector<int16_t> {0});
+    auto* value_reader_ptr = value_reader.get();
+    MapColumnReader reader(nested_map_schema(), nested_map_schema().type, std::move(key_reader),
+                           std::move(value_reader));
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(1, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 1);
+
+    const auto& nullable_map = assert_cast<const ColumnNullable&>(*column);
+    EXPECT_FALSE(nullable_map.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_map.get_nested_column());
+    ASSERT_EQ(map_column.get_offsets().size(), 1);
+    EXPECT_EQ(map_column.get_offsets()[0], 0);
+    EXPECT_EQ(value_reader_ptr->build_lengths(), std::vector<int64_t>({0}));
+}
+
+TEST(ParquetColumnReaderControlTest, ListMapSkipsAncestorEmptyRowsBeforeScalarValues) {
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 4, 4, 2, 4),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {1, 4},
+            std::vector<int16_t> {0, 0});
+    auto value_reader = make_scripted_scalar_reader(nested_int64_schema("value", 5, 5, 2, 4),
+                                                    scalar_batch({1, 5}, {0, 0}, {-1, 0}, {100}));
+
+    const auto map_type = make_nullable(
+            std::make_shared<DataTypeMap>(make_nullable(std::make_shared<DataTypeInt64>()),
+                                          make_nullable(std::make_shared<DataTypeInt64>())));
+    auto map_reader = std::make_unique<MapColumnReader>(
+            nested_map_schema(make_nullable(std::make_shared<DataTypeInt64>())), map_type,
+            std::move(key_reader), std::move(value_reader));
+    auto outer_type = make_nullable(std::make_shared<DataTypeArray>(map_type));
+    ListColumnReader reader(nested_list_schema("outer", map_type, 1, 2, 1, 2), outer_type,
+                            std::move(map_reader));
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = reader.build_nested_column(2, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_EQ(rows_read, 2);
+
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*column);
+    ASSERT_EQ(nullable_column.size(), 2);
+    EXPECT_FALSE(nullable_column.is_null_at(0));
+    EXPECT_FALSE(nullable_column.is_null_at(1));
+
+    const auto& outer_array = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    const auto& outer_offsets = outer_array.get_offsets();
+    ASSERT_EQ(outer_offsets.size(), 2);
+    EXPECT_EQ(outer_offsets[0], 0);
+    EXPECT_EQ(outer_offsets[1], 1);
+
+    const auto& map_nullable = assert_cast<const ColumnNullable&>(outer_array.get_data());
+    ASSERT_EQ(map_nullable.size(), 1);
+    EXPECT_FALSE(map_nullable.is_null_at(0));
+    const auto& map_column = assert_cast<const ColumnMap&>(map_nullable.get_nested_column());
+    ASSERT_EQ(map_column.get_offsets().size(), 1);
+    EXPECT_EQ(map_column.get_offsets()[0], 1);
+
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    const auto& value_data = assert_cast<const ColumnInt64&>(values.get_nested_column());
+    ASSERT_EQ(values.size(), 1);
+    EXPECT_FALSE(values.is_null_at(0));
+    EXPECT_EQ(value_data.get_element(0), 100);
+}
+
+TEST(ParquetColumnReaderControlTest, MapRejectsNullKeysAndMisalignedScalarValueRepLevels) {
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2}, std::vector<int16_t> {0}, false, true);
+    auto value_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("value", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2}, std::vector<int16_t> {0});
+    MapColumnReader null_key_reader(nested_map_schema(), nested_map_schema().type,
+                                    std::move(key_reader), std::move(value_reader));
+    auto column = null_key_reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = null_key_reader.build_nested_column(1, column, &rows_read);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains null key"), std::string::npos);
+
+    auto aligned_key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 2}, std::vector<int16_t> {0, 1});
+    auto misaligned_value_reader =
+            make_scripted_scalar_reader(nested_int64_schema("value", 2, 3, 1),
+                                        scalar_batch({3, 3}, {0, 0}, {0, 1}, {100, 200}));
+    MapColumnReader misaligned_reader(nested_map_schema(), nested_map_schema().type,
+                                      std::move(aligned_key_reader),
+                                      std::move(misaligned_value_reader));
+    column = misaligned_reader.type()->create_column();
+    status = misaligned_reader.build_nested_column(1, column, &rows_read);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("value repetition level is not aligned"), std::string::npos);
+}
+
+TEST(ParquetColumnReaderControlTest, MapBuildsScalarAndComplexValuePaths) {
+    auto key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 2}, std::vector<int16_t> {0, 1});
+    auto scalar_value_reader =
+            make_scripted_scalar_reader(nested_int64_schema("value", 2, 3, 1),
+                                        scalar_batch({3, 3}, {0, 1}, {0, 1}, {100, 200}));
+    MapColumnReader scalar_reader(nested_map_schema(), nested_map_schema().type,
+                                  std::move(key_reader), std::move(scalar_value_reader));
+    auto column = scalar_reader.type()->create_column();
+    int64_t rows_read = 0;
+    auto status = scalar_reader.build_nested_column(1, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    const auto& nullable_map = assert_cast<const ColumnNullable&>(*column);
+    const auto& map_column = assert_cast<const ColumnMap&>(nullable_map.get_nested_column());
+    ASSERT_EQ(map_column.get_offsets().size(), 1);
+    EXPECT_EQ(map_column.get_offsets()[0], 2);
+    const auto& values = assert_cast<const ColumnNullable&>(map_column.get_values());
+    const auto& value_data = assert_cast<const ColumnInt64&>(values.get_nested_column());
+    ASSERT_EQ(values.size(), 2);
+    EXPECT_EQ(value_data.get_element(0), 100);
+    EXPECT_EQ(value_data.get_element(1), 200);
+
+    auto complex_key_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("key", 1, 2, 1), make_nullable(std::make_shared<DataTypeInt64>()),
+            std::vector<int16_t> {2, 2}, std::vector<int16_t> {0, 1});
+    auto complex_value_reader = std::make_unique<ScriptedNestedReader>(
+            nested_int64_schema("complex_value", 2, 3, 1),
+            make_nullable(std::make_shared<DataTypeInt64>()), std::vector<int16_t> {3, 3},
+            std::vector<int16_t> {0, 1});
+    auto* complex_value_reader_ptr = complex_value_reader.get();
+    MapColumnReader complex_reader(nested_map_schema(), nested_map_schema().type,
+                                   std::move(complex_key_reader), std::move(complex_value_reader));
+    column = complex_reader.type()->create_column();
+    status = complex_reader.build_nested_column(1, column, &rows_read);
+    ASSERT_TRUE(status.ok()) << status;
+    EXPECT_EQ(complex_value_reader_ptr->build_lengths(), std::vector<int64_t>({2}));
+}
+
+TEST(ParquetVirtualColumnReaderTest, RowPositionReadSkipAndInvalidArgs) {
+    RowPositionColumnReader reader(100);
+    EXPECT_EQ(reader.file_column_id(), format::ROW_POSITION_COLUMN_ID);
+    EXPECT_EQ(reader.parquet_leaf_column_id(), -1);
+    EXPECT_EQ(reader.name(), format::ROW_POSITION_COLUMN_NAME);
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader.read(2, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, 2);
+    ASSERT_TRUE(reader.skip(3).ok());
+    ASSERT_TRUE(reader.read(2, column, &rows_read).ok());
+
+    const auto& values = assert_cast<const ColumnInt64&>(*column);
+    ASSERT_EQ(values.size(), 4);
+    EXPECT_EQ(values.get_element(0), 100);
+    EXPECT_EQ(values.get_element(1), 101);
+    EXPECT_EQ(values.get_element(2), 105);
+    EXPECT_EQ(values.get_element(3), 106);
+
+    MutableColumnPtr null_column;
+    EXPECT_FALSE(reader.read(1, null_column, &rows_read).ok());
+    EXPECT_FALSE(reader.read(-1, column, &rows_read).ok());
+    EXPECT_FALSE(reader.read(1, column, nullptr).ok());
+}
+
+TEST(ParquetVirtualColumnReaderTest, GlobalRowIdReadSkipSelectAndInvalidArgs) {
+    format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42};
+    GlobalRowIdColumnReader reader(context, 10);
+    EXPECT_EQ(reader.file_column_id(), format::GLOBAL_ROWID_COLUMN_ID);
+    EXPECT_EQ(reader.parquet_leaf_column_id(), -1);
+    EXPECT_EQ(reader.name(), BeConsts::GLOBAL_ROWID_COL);
+
+    auto column = reader.type()->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader.read(2, column, &rows_read).ok());
+    ASSERT_TRUE(reader.skip(2).ok());
+    ASSERT_TRUE(reader.read(1, column, &rows_read).ok());
+
+    const auto& strings = assert_cast<const ColumnString&>(*column);
+    ASSERT_EQ(strings.size(), 3);
+    const auto first = decode_rowid(strings, 0);
+    EXPECT_EQ(first.version, context.version);
+    EXPECT_EQ(first.backend_id, context.backend_id);
+    EXPECT_EQ(first.file_id, context.file_id);
+    EXPECT_EQ(first.row_id, 10);
+    EXPECT_EQ(decode_rowid(strings, 1).row_id, 11);
+    EXPECT_EQ(decode_rowid(strings, 2).row_id, 14);
+
+    GlobalRowIdColumnReader select_reader(context, 20);
+    SelectionVector selection(2);
+    selection.set_index(0, 1);
+    selection.set_index(1, 3);
+    auto selected_column = select_reader.type()->create_column();
+    ASSERT_TRUE(select_reader.select(selection, 2, 5, selected_column).ok());
+    const auto& selected_strings = assert_cast<const ColumnString&>(*selected_column);
+    ASSERT_EQ(selected_strings.size(), 2);
+    EXPECT_EQ(decode_rowid(selected_strings, 0).row_id, 21);
+    EXPECT_EQ(decode_rowid(selected_strings, 1).row_id, 23);
+
+    MutableColumnPtr null_column;
+    EXPECT_FALSE(reader.read(1, null_column, &rows_read).ok());
+    EXPECT_FALSE(reader.read(-1, column, &rows_read).ok());
+    EXPECT_FALSE(reader.read(1, column, nullptr).ok());
+}
+
+TEST(ParquetColumnReaderFactoryTest, RejectsInvalidLeafIdBeforeCreatingRecordReader) {
+    ParquetColumnSchema schema = int64_schema("bad_leaf");
+    schema.kind = ParquetColumnSchemaKind::PRIMITIVE;
+    schema.leaf_column_id = 3;
+    schema.type_descriptor.physical_type = ::parquet::Type::INT64;
+    schema.type_descriptor.doris_type = schema.type;
+
+    ParquetColumnReaderFactory factory(nullptr, 1);
+    std::unique_ptr<ParquetColumnReader> reader;
+    const auto status = factory.create(schema, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Invalid parquet leaf column id"), std::string::npos);
+}
+
+TEST(ParquetColumnReaderFactoryTest, RejectsStructInvalidAndEmptyProjection) {
+    auto schema = struct_schema_for_projection();
+    ParquetColumnReaderFactory factory(nullptr, 0);
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    auto invalid_projection = format::LocalColumnIndex::partial_local(0);
+    invalid_projection.children.push_back(format::LocalColumnIndex::local(9));
+    auto status = factory.create(schema, &invalid_projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("invalid child"), std::string::npos);
+
+    auto empty_projection = format::LocalColumnIndex::partial_local(0);
+    status = factory.create(schema, &empty_projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no children"), std::string::npos);
+}
+
+TEST(ParquetColumnReaderFactoryTest, RejectsListProjectionWithoutElement) {
+    auto schema = list_schema_for_projection();
+    ParquetColumnReaderFactory factory(nullptr, 0);
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    auto projection = format::LocalColumnIndex::partial_local(0);
+    const auto status = factory.create(schema, &projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no element"), std::string::npos);
+}
+
+TEST(ParquetColumnReaderFactoryTest, RejectsMapInvalidAndKeyOnlyProjection) {
+    auto schema = map_schema_for_projection();
+    ParquetColumnReaderFactory factory(nullptr, 0);
+    std::unique_ptr<ParquetColumnReader> reader;
+
+    auto invalid_projection = format::LocalColumnIndex::partial_local(0);
+    invalid_projection.children.push_back(format::LocalColumnIndex::local(1));
+    invalid_projection.children.push_back(format::LocalColumnIndex::local(9));
+    auto status = factory.create(schema, &invalid_projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("invalid child"), std::string::npos);
+
+    auto key_only_projection = format::LocalColumnIndex::partial_local(0);
+    key_only_projection.children.push_back(format::LocalColumnIndex::local(0));
+    status = factory.create(schema, &key_only_projection, &reader);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("contains no value"), std::string::npos);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_reader_test.cpp b/be/test/format_v2/parquet/parquet_reader_test.cpp
new file mode 100644
index 00000000000000..ec71ebc614d633
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_reader_test.cpp
@@ -0,0 +1,2274 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_reader.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/page_index.h>
+
+#include <cstring>
+#include <filesystem>
+#include <map>
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/primitive_type.h"
+#include "core/field.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "exprs/vslot_ref.h"
+#include "format_v2/column_mapper.h"
+#include "format_v2/expr/delete_predicate.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_scan.h"
+#include "format_v2/parquet/reader/column_reader.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Types_types.h"
+#include "io/io_common.h"
+#include "runtime/runtime_state.h"
+#include "storage/predicate/predicate_creator.h"
+#include "storage/segment/condition_cache.h"
+#include "storage/utils.h"
+
+namespace doris {
+namespace {
+
+constexpr int64_t ROW_COUNT = 5;
+
+format::LocalColumnIndex field_projection(int32_t column_id) {
+    return format::LocalColumnIndex {.index = column_id};
+}
+
+template <typename ColumnType>
+const ColumnType& nullable_nested_column(const Block& block, size_t position) {
+    const IColumn* column = block.get_by_position(position).column.get();
+    int nullable_depth = 0;
+    while (const auto* nullable = check_and_get_column<ColumnNullable>(*column)) {
+        const auto& null_map = nullable->get_null_map_data();
+        for (size_t row = 0; row < null_map.size(); ++row) {
+            EXPECT_EQ(null_map[row], 0) << "Unexpected null at row " << row << ", column position "
+                                        << position << ", nullable depth " << nullable_depth;
+        }
+        column = &nullable->get_nested_column();
+        ++nullable_depth;
+    }
+    EXPECT_GT(nullable_depth, 0) << "Expected a nullable file-local column at position "
+                                 << position;
+    return assert_cast<const ColumnType&>(*column);
+}
+
+class Int32GreaterThanExpr final : public VExpr {
+public:
+    Int32GreaterThanExpr(int column_id, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _column_id(column_id),
+              _value(value) {}
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        const auto& input = nullable_nested_column<ColumnInt32>(*block, _column_id);
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const size_t input_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] = input.get_element(input_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    const int _column_id;
+    const int32_t _value;
+    const std::string _expr_name = "Int32GreaterThanExpr";
+};
+
+class Int32SumGreaterThanExpr final : public VExpr {
+public:
+    Int32SumGreaterThanExpr(int left_column_id, int right_column_id, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _left_column_id(left_column_id),
+              _right_column_id(right_column_id),
+              _value(value) {}
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        const auto& left_input = nullable_nested_column<ColumnInt32>(*block, _left_column_id);
+        const auto& right_input = nullable_nested_column<ColumnInt32>(*block, _right_column_id);
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const size_t input_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    left_input.get_element(input_row) + right_input.get_element(input_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    const int _left_column_id;
+    const int _right_column_id;
+    const int32_t _value;
+    const std::string _expr_name = "Int32SumGreaterThanExpr";
+};
+
+class StringInExpr final : public VExpr {
+public:
+    StringInExpr(int column_id, std::vector<std::string> values)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _column_id(column_id),
+              _values(std::move(values)) {}
+
+    Status execute_column_impl(VExprContext* context, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        const auto& input = nullable_nested_column<ColumnString>(*block, _column_id);
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const size_t input_row = selector == nullptr ? row : (*selector)[row];
+            const auto value = input.get_data_at(input_row).to_string();
+            result_data[row] = std::find(_values.begin(), _values.end(), value) != _values.end();
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    const std::string& expr_name() const override { return _expr_name; }
+
+private:
+    const int _column_id;
+    const std::vector<std::string> _values;
+    const std::string _expr_name = "StringInExpr";
+};
+
+VExprContextSPtr create_int32_greater_than_conjunct(int column_id, int32_t value) {
+    auto ctx =
+            VExprContext::create_shared(std::make_shared<Int32GreaterThanExpr>(column_id, value));
+    ctx->_prepared = true;
+    ctx->_opened = true;
+    return ctx;
+}
+
+VExprContextSPtr create_int32_sum_greater_than_conjunct(int left_column_id, int right_column_id,
+                                                        int32_t value) {
+    auto ctx = VExprContext::create_shared(
+            std::make_shared<Int32SumGreaterThanExpr>(left_column_id, right_column_id, value));
+    ctx->_prepared = true;
+    ctx->_opened = true;
+    return ctx;
+}
+
+VExprContextSPtr create_string_in_conjunct(int column_id, std::vector<std::string> values) {
+    auto ctx = VExprContext::create_shared(
+            std::make_shared<StringInExpr>(column_id, std::move(values)));
+    ctx->_prepared = true;
+    ctx->_opened = true;
+    return ctx;
+}
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_timestamp_array(const std::shared_ptr<arrow::DataType>& type,
+                                                    const std::vector<int64_t>& values) {
+    arrow::TimestampBuilder builder(type, arrow::default_memory_pool());
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_struct_array(const std::vector<int32_t>& ids,
+                                                 const std::vector<std::string>& names) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false),
+                                       arrow::field("name", arrow::utf8(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto id_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(id_builder)));
+    auto name_builder = std::make_unique<arrow::StringBuilder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(name_builder)));
+    arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                 std::move(field_builders));
+    auto* struct_id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    auto* struct_name_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+    for (size_t row = 0; row < ids.size(); ++row) {
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(struct_id_builder->Append(ids[row]).ok());
+        EXPECT_TRUE(struct_name_builder->Append(names[row]).ok());
+    }
+    return finish_array(&builder);
+}
+
+void write_parquet_file(const std::string& file_path, int64_t row_group_size = ROW_COUNT) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(schema,
+                                    {build_int32_array({1, 2, 3, 4, 5}),
+                                     build_string_array({"one", "two", "three", "four", "five"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      row_group_size, builder.build()));
+}
+
+std::shared_ptr<arrow::Array> build_nullable_int_string_map_array() {
+    auto key_builder = std::make_shared<arrow::Int32Builder>();
+    auto value_builder = std::make_shared<arrow::StringBuilder>();
+    auto map_type = arrow::map(arrow::int32(), arrow::field("value", arrow::utf8(), true));
+    arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, map_type);
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(key_builder->Append(10).ok());
+    EXPECT_TRUE(value_builder->Append("small").ok());
+
+    EXPECT_TRUE(builder.AppendNull().ok());
+    EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(key_builder->Append(20).ok());
+    EXPECT_TRUE(value_builder->Append(std::string(4096, 'x')).ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(key_builder->Append(30).ok());
+    EXPECT_TRUE(value_builder->AppendNull().ok());
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_nullable_string_list_array() {
+    auto value_builder = std::make_shared<arrow::StringBuilder>();
+    arrow::ListBuilder builder(arrow::default_memory_pool(), value_builder,
+                               arrow::list(arrow::field("element", arrow::utf8(), true)));
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(value_builder->Append("small").ok());
+    EXPECT_TRUE(value_builder->Append(std::string(4096, 'a')).ok());
+
+    EXPECT_TRUE(builder.AppendNull().ok());
+    EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(value_builder->AppendNull().ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(value_builder->Append(std::string(4096, 'b')).ok());
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_nullable_string_struct_array() {
+    auto struct_type = arrow::struct_({arrow::field("payload", arrow::utf8(), true),
+                                       arrow::field("id", arrow::int32(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto payload_builder = std::make_unique<arrow::StringBuilder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(payload_builder)));
+    auto id_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(id_builder)));
+    arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                 std::move(field_builders));
+    auto* struct_payload_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(0));
+    auto* struct_id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(1));
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_payload_builder->Append("small").ok());
+    EXPECT_TRUE(struct_id_builder->Append(1).ok());
+
+    EXPECT_TRUE(builder.AppendNull().ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_payload_builder->Append(std::string(4096, 'c')).ok());
+    EXPECT_TRUE(struct_id_builder->Append(2).ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_payload_builder->AppendNull().ok());
+    EXPECT_TRUE(struct_id_builder->Append(3).ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_payload_builder->Append(std::string(4096, 'd')).ok());
+    EXPECT_TRUE(struct_id_builder->Append(4).ok());
+    return finish_array(&builder);
+}
+
+void write_nullable_map_parquet_file(const std::string& file_path) {
+    auto array = build_nullable_int_string_map_array();
+    auto field = arrow::field("arr", array->type(), true);
+    auto table = arrow::Table::Make(arrow::schema({field}), {array});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ROW_COUNT, builder.build()));
+}
+
+void write_nullable_string_list_parquet_file(const std::string& file_path) {
+    auto array = build_nullable_string_list_array();
+    auto field = arrow::field("arr", array->type(), true);
+    auto table = arrow::Table::Make(arrow::schema({field}), {array});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ROW_COUNT, builder.build()));
+}
+
+void write_nullable_string_struct_parquet_file(const std::string& file_path) {
+    auto array = build_nullable_string_struct_array();
+    auto field = arrow::field("s", array->type(), true);
+    auto table = arrow::Table::Make(arrow::schema({field}), {array});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ROW_COUNT, builder.build()));
+}
+
+void write_int96_timestamp_parquet_file(const std::string& file_path) {
+    auto field = arrow::field("ts_tz", arrow::timestamp(arrow::TimeUnit::MICRO), true);
+    auto array =
+            build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                  {1735660800000000LL, 1735660800123456LL, 1735689600000000LL});
+    auto table = arrow::Table::Make(arrow::schema({field}), {array});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    ::parquet::ArrowWriterProperties::Builder arrow_builder;
+    arrow_builder.enable_force_write_int96_timestamps();
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ROW_COUNT, writer_builder.build(),
+                                                      arrow_builder.build()));
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, int64_t row_group_size = ROW_COUNT) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("score", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_int32_array({1, 2, 3, 4, 5}), build_int32_array({1, 2, 3, 4, 5}),
+                     build_string_array({"one", "two", "three", "four", "five"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      row_group_size, builder.build()));
+}
+
+void write_condition_cache_parquet_file(const std::string& file_path) {
+    constexpr int64_t row_count = ConditionCacheContext::GRANULE_SIZE * 2;
+    std::vector<int32_t> ids(row_count);
+    std::iota(ids.begin(), ids.end(), 0);
+
+    auto schema = arrow::schema({arrow::field("id", arrow::int32(), false)});
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      row_count, builder.build()));
+}
+
+void write_struct_filter_parquet_file(const std::string& file_path) {
+    auto id_field = arrow::field("id", arrow::int32(), false);
+    auto name_field = arrow::field("name", arrow::utf8(), false);
+    auto struct_type = arrow::struct_({id_field, name_field});
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_struct_array({1, 2, 10, 11}, {"one", "two", "ten", "eleven"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2,
+                                                      builder.build()));
+}
+
+void write_dictionary_filter_parquet_file(const std::string& file_path) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table =
+            arrow::Table::Make(schema, {build_int32_array({1, 2, 3, 4, 5, 6}),
+                                        build_string_array({"aa", "az", "lm", "lz", "za", "zz"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.enable_dictionary("value");
+    builder.disable_dictionary("id");
+    builder.disable_statistics();
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_nested_dictionary_filter_parquet_file(const std::string& file_path) {
+    auto id_field = arrow::field("id", arrow::int32(), false);
+    auto name_field = arrow::field("name", arrow::utf8(), false);
+    auto struct_type = arrow::struct_({id_field, name_field});
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_struct_array({1, 2, 3, 4, 5, 6}, {"aa", "az", "lm", "lz", "za", "zz"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.enable_dictionary("s.name");
+    builder.disable_dictionary("s.identifier.field_id");
+    builder.disable_statistics();
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_dictionary_edge_parquet_file(const std::string& file_path) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(
+            schema,
+            {build_int32_array({1, 2, 3, 4, 5, 6, 7, 8}),
+             build_string_array({"", "same", "other", "long-value", "", "tail", "same", "last"})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.enable_dictionary("value");
+    builder.disable_dictionary("id");
+    builder.disable_statistics();
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2,
+                                                      builder.build()));
+}
+
+void write_nested_page_index_filter_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    std::vector<std::string> names;
+    names.reserve(ids.size());
+    for (const auto id : ids) {
+        names.push_back("name-" + std::to_string(id));
+    }
+    auto id_field = arrow::field("id", arrow::int32(), false);
+    auto name_field = arrow::field("name", arrow::utf8(), false);
+    auto struct_type = arrow::struct_({id_field, name_field});
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {build_struct_array(ids, names)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.disable_dictionary();
+    builder.enable_write_page_index();
+    builder.write_batch_size(8);
+    builder.data_pagesize(10);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ids.size(), builder.build()));
+}
+
+void write_page_index_filter_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.disable_dictionary();
+    builder.enable_write_page_index();
+    builder.write_batch_size(8);
+    builder.data_pagesize(10);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ids.size(), builder.build()));
+}
+
+void write_page_index_filter_pair_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    std::vector<int32_t> payloads;
+    payloads.reserve(ids.size());
+    for (const auto id : ids) {
+        payloads.push_back(id + 1000);
+    }
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("payload", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(payloads)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    builder.disable_dictionary();
+    builder.enable_write_page_index();
+    builder.write_batch_size(8);
+    builder.data_pagesize(10);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      ids.size(), builder.build()));
+}
+
+Block build_file_block(const std::vector<format::ColumnDefinition>& schema) {
+    Block block;
+    for (const auto& field : schema) {
+        block.insert({field.type->create_column(), field.type, field.name});
+    }
+    return block;
+}
+
+Block build_file_block_with_row_position(const std::vector<format::ColumnDefinition>& schema) {
+    auto block = build_file_block(schema);
+    const auto row_position_field = format::row_position_column_definition();
+    block.insert({row_position_field.type->create_column(), row_position_field.type,
+                  row_position_field.name});
+    return block;
+}
+
+void use_schema_order_positions(format::FileScanRequest* request,
+                                const std::vector<format::ColumnDefinition>& schema) {
+    DORIS_CHECK(request != nullptr);
+    for (size_t idx = 0; idx < schema.size(); ++idx) {
+        request->local_positions.emplace(format::LocalColumnId(schema[idx].local_id),
+                                         format::LocalIndex(idx));
+    }
+}
+
+int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? static_cast<int64_t>(column_metadata.dictionary_page_offset())
+                   : static_cast<int64_t>(column_metadata.data_page_offset());
+}
+
+std::pair<int64_t, int64_t> row_group_mid_range(const std::string& file_path, int row_group_idx) {
+    auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false);
+    auto metadata = reader->metadata();
+    auto row_group_metadata = metadata->RowGroup(row_group_idx);
+    auto first_column = row_group_metadata->ColumnChunk(0);
+    auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1);
+    const int64_t row_group_start_offset = parquet_column_start_offset(*first_column);
+    const int64_t row_group_end_offset =
+            parquet_column_start_offset(*last_column) + last_column->total_compressed_size();
+    const int64_t row_group_mid_offset =
+            row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2;
+    return {row_group_mid_offset, 1};
+}
+
+GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) {
+    const auto ref = column.get_data_at(row);
+    EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2));
+    GlobalRowLoacationV2 location(0, 0, 0, 0);
+    std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2));
+    return location;
+}
+
+class TestFileReader final : public format::FileReader {
+public:
+    TestFileReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                   std::unique_ptr<io::FileDescription>& file_description,
+                   std::shared_ptr<io::IOContext> io_ctx)
+            : format::FileReader(system_properties, file_description, io_ctx, nullptr) {}
+
+    Status get_schema(std::vector<format::ColumnDefinition>* file_schema) const override {
+        file_schema->clear();
+        format::ColumnDefinition field;
+        field.identifier = Field::create_field<TYPE_INT>(0);
+        field.name = "id";
+        field.type = std::make_shared<DataTypeInt32>();
+        file_schema->push_back(std::move(field));
+        return Status::OK();
+    }
+
+    bool has_request() const { return _request != nullptr; }
+
+    bool eof() const { return _eof; }
+
+    bool has_io_context() const { return _io_ctx != nullptr; }
+
+    long io_context_use_count() const { return _io_ctx.use_count(); }
+};
+
+TEST(FileReaderTest, OpenStoresRequestAndCloseKeepsRequest) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto file_description = std::make_unique<io::FileDescription>();
+    auto io_ctx = std::make_shared<io::IOContext>();
+    TestFileReader reader(system_properties, file_description, io_ctx);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns.push_back(field_projection(0));
+    ASSERT_TRUE(reader.open(request).ok());
+    EXPECT_NE(request, nullptr);
+    EXPECT_TRUE(reader.has_request());
+
+    ASSERT_TRUE(reader.close().ok());
+    EXPECT_TRUE(reader.has_request());
+    EXPECT_TRUE(reader.eof());
+}
+
+TEST(FileReaderTest, CloseReleasesSharedIOContext) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    system_properties->system_type = TFileType::FILE_LOCAL;
+    auto file_description = std::make_unique<io::FileDescription>();
+    auto io_ctx = std::make_shared<io::IOContext>();
+    std::weak_ptr<io::IOContext> weak_io_ctx = io_ctx;
+    TestFileReader reader(system_properties, file_description, io_ctx);
+
+    EXPECT_TRUE(reader.has_io_context());
+    EXPECT_EQ(reader.io_context_use_count(), 2);
+    io_ctx.reset();
+    EXPECT_FALSE(weak_io_ctx.expired());
+    EXPECT_EQ(reader.io_context_use_count(), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    EXPECT_FALSE(reader.has_io_context());
+    EXPECT_TRUE(weak_io_ctx.expired());
+}
+
+class NewParquetReaderTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_parquet_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "reader.parquet").string();
+        write_parquet_file(_file_path);
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+    std::unique_ptr<format::parquet::ParquetReader> create_reader(
+            int64_t range_start_offset = 0, int64_t range_size = -1,
+            RuntimeProfile* profile = nullptr, bool enable_mapping_timestamp_tz = false,
+            std::shared_ptr<io::IOContext> io_ctx = nullptr,
+            std::optional<format::GlobalRowIdContext> global_rowid_context = std::nullopt) const {
+        auto system_properties = std::make_shared<io::FileSystemProperties>();
+        system_properties->system_type = TFileType::FILE_LOCAL;
+        auto file_description = std::make_unique<io::FileDescription>();
+        file_description->path = _file_path;
+        file_description->file_size = static_cast<int64_t>(std::filesystem::file_size(_file_path));
+        file_description->range_start_offset = range_start_offset;
+        file_description->range_size = range_size;
+        return std::make_unique<format::parquet::ParquetReader>(
+                system_properties, file_description, std::move(io_ctx), profile,
+                global_rowid_context, enable_mapping_timestamp_tz);
+    }
+
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+};
+
+TEST_F(NewParquetReaderTest, GetSchemaReturnsFileLocalColumns) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 2);
+    EXPECT_EQ(schema[0].local_id, 0);
+    EXPECT_EQ(schema[0].name, "id");
+    ASSERT_TRUE(schema[0].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[0].type)->get_primitive_type(), TYPE_INT);
+    EXPECT_EQ(schema[1].local_id, 1);
+    EXPECT_EQ(schema[1].name, "value");
+    ASSERT_TRUE(schema[1].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[1].type)->get_primitive_type(), TYPE_STRING);
+}
+
+// Scenario: Parquet is columnar and supports predicate/non-predicate split, nested projection and
+// file-layer pruning hints. The reader declares those scan-request capabilities by choosing
+// ParquetColumnMapper itself.
+TEST_F(NewParquetReaderTest, CreatesParquetColumnMapper) {
+    auto reader = create_reader();
+    auto mapper =
+            reader->create_column_mapper({.mode = format::TableColumnMappingMode::BY_FIELD_ID});
+
+    ASSERT_NE(dynamic_cast<format::ParquetColumnMapper*>(mapper.get()), nullptr);
+}
+
+TEST_F(NewParquetReaderTest, CountComplexColumnUsesShapeOnlyPath) {
+    write_nullable_map_parquet_file(_file_path);
+    RuntimeProfile profile("count_map_shape_only_path");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+    ASSERT_TRUE(reader->open(std::make_shared<format::FileScanRequest>()).ok());
+
+    format::FileAggregateRequest request;
+    request.agg_type = TPushAggOp::type::COUNT;
+    request.columns.push_back(
+            {.projection = format::LocalColumnIndex::top_level(format::LocalColumnId(0))});
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(request, &result).ok());
+
+    // Rows are: non-empty map, NULL map, empty map, non-empty map with large value string,
+    // non-empty map with NULL value. COUNT(arr) excludes only the top-level NULL map.
+    EXPECT_EQ(result.count, 4);
+    ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr);
+    EXPECT_EQ(profile.get_counter("MaterializationTime")->value(), 0);
+}
+
+TEST_F(NewParquetReaderTest, CountArrayColumnUsesLevelsOnlyPath) {
+    write_nullable_string_list_parquet_file(_file_path);
+    RuntimeProfile profile("count_array_levels_only_path");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+    ASSERT_TRUE(reader->open(std::make_shared<format::FileScanRequest>()).ok());
+
+    format::FileAggregateRequest request;
+    request.agg_type = TPushAggOp::type::COUNT;
+    request.columns.push_back(
+            {.projection = format::LocalColumnIndex::top_level(format::LocalColumnId(0))});
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(request, &result).ok());
+
+    // Rows are: non-empty array with a large string, NULL array, empty array, non-empty array
+    // with NULL element, non-empty array with a large string. Only the top-level NULL is excluded.
+    EXPECT_EQ(result.count, 4);
+    ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr);
+    EXPECT_EQ(profile.get_counter("MaterializationTime")->value(), 0);
+}
+
+TEST_F(NewParquetReaderTest, CountStructColumnUsesLevelsOnlyPath) {
+    write_nullable_string_struct_parquet_file(_file_path);
+    RuntimeProfile profile("count_struct_levels_only_path");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+    ASSERT_TRUE(reader->open(std::make_shared<format::FileScanRequest>()).ok());
+
+    format::FileAggregateRequest request;
+    request.agg_type = TPushAggOp::type::COUNT;
+    request.columns.push_back(
+            {.projection = format::LocalColumnIndex::top_level(format::LocalColumnId(0))});
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(request, &result).ok());
+
+    // The representative STRUCT leaf is the first child, a nullable STRING payload. A row with
+    // NULL payload but non-NULL struct still counts; only the top-level NULL struct is excluded.
+    EXPECT_EQ(result.count, 4);
+    ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr);
+    EXPECT_EQ(profile.get_counter("MaterializationTime")->value(), 0);
+}
+
+TEST_F(NewParquetReaderTest, GetSchemaReturnsNullableNestedChildren) {
+    write_struct_filter_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 1);
+    EXPECT_EQ(schema[0].name, "s");
+    ASSERT_TRUE(schema[0].type->is_nullable());
+    ASSERT_EQ(schema[0].children.size(), 2);
+    EXPECT_EQ(schema[0].children[0].name, "id");
+    ASSERT_TRUE(schema[0].children[0].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[0].children[0].type)->get_primitive_type(), TYPE_INT);
+    EXPECT_EQ(schema[0].children[1].name, "name");
+    ASSERT_TRUE(schema[0].children[1].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[0].children[1].type)->get_primitive_type(), TYPE_STRING);
+
+    const auto* struct_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(schema[0].type).get());
+    ASSERT_EQ(struct_type->get_elements().size(), 2);
+    EXPECT_TRUE(struct_type->get_element(0)->is_nullable());
+    EXPECT_TRUE(struct_type->get_element(1)->is_nullable());
+}
+
+TEST_F(NewParquetReaderTest, GetSchemaMapsInt96ToTimestampTzWhenTimestampTzMappingEnabled) {
+    write_int96_timestamp_parquet_file(_file_path);
+    auto reader = create_reader(0, -1, nullptr, true);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 1);
+    EXPECT_EQ(schema[0].name, "ts_tz");
+    ASSERT_TRUE(schema[0].type->is_nullable());
+    EXPECT_EQ(remove_nullable(schema[0].type)->get_primitive_type(), TYPE_TIMESTAMPTZ);
+    EXPECT_EQ(remove_nullable(schema[0].type)->get_scale(), 6);
+}
+
+TEST_F(NewParquetReaderTest, ReadSingleRowGroupThenEof) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0), field_projection(1)};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, ROW_COUNT);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    ASSERT_EQ(ids.size(), ROW_COUNT);
+    ASSERT_EQ(values.size(), ROW_COUNT);
+    EXPECT_EQ(ids.get_element(0), 1);
+    EXPECT_EQ(ids.get_element(4), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "one");
+    EXPECT_EQ(values.get_data_at(4).to_string(), "five");
+
+    rows = 0;
+    eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, RespectsConfiguredBatchSize) {
+    auto reader = create_reader();
+    reader->set_batch_size(1);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0), field_projection(1)};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    for (int32_t expected_id = 1; expected_id <= ROW_COUNT; ++expected_id) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        bool eof = false;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        EXPECT_FALSE(eof);
+        ASSERT_EQ(rows, 1);
+        const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+        ASSERT_EQ(ids.size(), 1);
+        EXPECT_EQ(ids.get_element(0), expected_id);
+    }
+
+    Block block = build_file_block(schema);
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, ConditionCacheMissMarksSurvivingGranules) {
+    write_condition_cache_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 1);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(
+            create_int32_greater_than_conjunct(0, ConditionCacheContext::GRANULE_SIZE - 1));
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto ctx = std::make_shared<ConditionCacheContext>();
+    ctx->is_hit = false;
+    ctx->filter_result = std::make_shared<std::vector<bool>>(3, false);
+    reader->set_condition_cache_context(ctx);
+
+    std::vector<int32_t> ids;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+        }
+    }
+
+    ASSERT_EQ(ids.size(), ConditionCacheContext::GRANULE_SIZE);
+    EXPECT_EQ(ids.front(), ConditionCacheContext::GRANULE_SIZE);
+    EXPECT_EQ(ids.back(), ConditionCacheContext::GRANULE_SIZE * 2 - 1);
+    EXPECT_FALSE((*ctx->filter_result)[0]);
+    EXPECT_TRUE((*ctx->filter_result)[1]);
+    EXPECT_FALSE((*ctx->filter_result)[2]);
+}
+
+TEST_F(NewParquetReaderTest, ConditionCacheHitSkipsFalseGranulesBeforeColumnRead) {
+    write_condition_cache_parquet_file(_file_path);
+    auto io_ctx = std::make_shared<io::IOContext>();
+    auto reader = create_reader(0, -1, nullptr, false, io_ctx);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 1);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(
+            create_int32_greater_than_conjunct(0, ConditionCacheContext::GRANULE_SIZE - 1));
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto ctx = std::make_shared<ConditionCacheContext>();
+    ctx->is_hit = true;
+    ctx->filter_result =
+            std::make_shared<std::vector<bool>>(std::vector<bool> {false, true, false});
+    reader->set_condition_cache_context(ctx);
+
+    Block block = build_file_block(schema);
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, ConditionCacheContext::GRANULE_SIZE);
+    EXPECT_EQ(io_ctx->condition_cache_filtered_rows, ConditionCacheContext::GRANULE_SIZE);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    EXPECT_EQ(ids.get_element(0), ConditionCacheContext::GRANULE_SIZE);
+    EXPECT_EQ(ids.get_element(rows - 1), ConditionCacheContext::GRANULE_SIZE * 2 - 1);
+
+    block = build_file_block(schema);
+    rows = 0;
+    eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, ReadMultipleRowGroups) {
+    write_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0), field_projection(1)};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 3, 4, 5}));
+    EXPECT_EQ(values, std::vector<std::string>({"one", "two", "three", "four", "five"}));
+}
+
+TEST_F(NewParquetReaderTest, RewriteSameLocalPathDoesNotReuseUnknownMtimePageCache) {
+    RuntimeProfile first_profile("new_parquet_reader_first_unknown_mtime");
+    {
+        auto reader = create_reader(0, -1, &first_profile);
+        RuntimeState state {TQueryOptions(), TQueryGlobals()};
+        ASSERT_TRUE(reader->init(&state).ok());
+
+        std::vector<format::ColumnDefinition> schema;
+        ASSERT_TRUE(reader->get_schema(&schema).ok());
+        auto request = std::make_shared<format::FileScanRequest>();
+        request->non_predicate_columns = {field_projection(0), field_projection(1)};
+        ASSERT_TRUE(reader->open(request).ok());
+
+        bool eof = false;
+        while (!eof) {
+            Block block = build_file_block(schema);
+            size_t rows = 0;
+            ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        }
+    }
+
+    ASSERT_NE(first_profile.get_counter("PageReadCount"), nullptr);
+    ASSERT_NE(first_profile.get_counter("PageCacheWriteCount"), nullptr);
+    EXPECT_EQ(first_profile.get_counter("PageReadCount")->value(), 0);
+    EXPECT_EQ(first_profile.get_counter("PageCacheWriteCount")->value(), 0);
+
+    // LocalFileReader reports mtime as 0. Rewriting the same path must not reuse page-cache bytes
+    // from the previous physical file, even when the query option enables parquet file page cache.
+    write_int_pair_parquet_file(_file_path);
+    RuntimeProfile second_profile("new_parquet_reader_second_unknown_mtime");
+    auto reader = create_reader(0, -1, &second_profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0), field_projection(1)};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<int32_t> scores;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& score_column = nullable_nested_column<ColumnInt32>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            scores.push_back(score_column.get_element(row));
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 3, 4, 5}));
+    EXPECT_EQ(scores, std::vector<int32_t>({1, 2, 3, 4, 5}));
+    ASSERT_NE(second_profile.get_counter("PageReadCount"), nullptr);
+    ASSERT_NE(second_profile.get_counter("PageCacheWriteCount"), nullptr);
+    EXPECT_EQ(second_profile.get_counter("PageReadCount")->value(), 0);
+    EXPECT_EQ(second_profile.get_counter("PageCacheWriteCount")->value(), 0);
+}
+
+TEST_F(NewParquetReaderTest, ReadPredicateAndNonPredicateColumnsWithSelection) {
+    RuntimeProfile profile("new_parquet_reader_filter_profile");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(2), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    ASSERT_EQ(ids.size(), 3);
+    ASSERT_EQ(values.size(), 3);
+    EXPECT_EQ(ids.get_element(0), 3);
+    EXPECT_EQ(ids.get_element(1), 4);
+    EXPECT_EQ(ids.get_element(2), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "three");
+    EXPECT_EQ(values.get_data_at(1).to_string(), "four");
+    EXPECT_EQ(values.get_data_at(2).to_string(), "five");
+
+    ASSERT_NE(profile.get_counter("FileReaderCreateTime"), nullptr);
+    ASSERT_NE(profile.get_counter("FileNum"), nullptr);
+    ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr);
+    ASSERT_NE(profile.get_counter("SelectedRows"), nullptr);
+    ASSERT_NE(profile.get_counter("RowsFilteredByConjunct"), nullptr);
+    ASSERT_NE(profile.get_counter("TotalBatches"), nullptr);
+    ASSERT_NE(profile.get_counter("EmptySelectionBatches"), nullptr);
+    ASSERT_NE(profile.get_counter("ReaderReadRows"), nullptr);
+    ASSERT_NE(profile.get_counter("ReaderSkipRows"), nullptr);
+    ASSERT_NE(profile.get_counter("ReaderSelectRows"), nullptr);
+    ASSERT_NE(profile.get_counter("ArrowReadRecordsTime"), nullptr);
+    ASSERT_NE(profile.get_counter("MaterializationTime"), nullptr);
+    ASSERT_GT(profile.get_counter("FileReaderCreateTime")->value(), 0);
+    EXPECT_EQ(profile.get_counter("FileNum")->value(), 1);
+    EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), ROW_COUNT);
+    EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 3);
+    EXPECT_EQ(profile.get_counter("RowsFilteredByConjunct")->value(), 2);
+    EXPECT_EQ(profile.get_counter("TotalBatches")->value(), 1);
+    EXPECT_EQ(profile.get_counter("EmptySelectionBatches")->value(), 0);
+    EXPECT_EQ(profile.get_counter("ReaderReadRows")->value(), ROW_COUNT + 3);
+    EXPECT_EQ(profile.get_counter("ReaderSkipRows")->value(), 2);
+    EXPECT_EQ(profile.get_counter("ReaderSelectRows")->value(), 3);
+    EXPECT_GT(profile.get_counter("ArrowReadRecordsTime")->value(), 0);
+    EXPECT_GT(profile.get_counter("MaterializationTime")->value(), 0);
+
+    rows = 0;
+    eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, GlobalRowIdSchemaAndSelectionUseFileRowPosition) {
+    format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42};
+    auto reader = create_reader(0, -1, nullptr, false, nullptr, context);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    EXPECT_EQ(schema[2].local_id, format::GLOBAL_ROWID_COLUMN_ID);
+    EXPECT_EQ(schema[2].column_type, format::GLOBAL_ROWID);
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1),
+                                      field_projection(format::GLOBAL_ROWID_COLUMN_ID)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    const auto& rowids = assert_cast<const ColumnString&>(*block.get_by_position(2).column);
+    ASSERT_EQ(ids.size(), 3);
+    ASSERT_EQ(values.size(), 3);
+    ASSERT_EQ(rowids.size(), 3);
+    EXPECT_EQ(ids.get_element(0), 3);
+    EXPECT_EQ(ids.get_element(1), 4);
+    EXPECT_EQ(ids.get_element(2), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "three");
+    EXPECT_EQ(values.get_data_at(1).to_string(), "four");
+    EXPECT_EQ(values.get_data_at(2).to_string(), "five");
+
+    for (size_t row = 0; row < rows; ++row) {
+        const auto location = decode_rowid(rowids, row);
+        EXPECT_EQ(location.version, context.version);
+        EXPECT_EQ(location.backend_id, context.backend_id);
+        EXPECT_EQ(location.file_id, context.file_id);
+        EXPECT_EQ(location.row_id, static_cast<uint32_t>(row + 2));
+    }
+}
+
+TEST_F(NewParquetReaderTest, ColumnPredicateOnlyPrunesAndDoesNotFilterRowsInsideRowGroup) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(2), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, ROW_COUNT);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    ASSERT_EQ(ids.size(), ROW_COUNT);
+    ASSERT_EQ(values.size(), ROW_COUNT);
+    EXPECT_EQ(ids.get_element(0), 1);
+    EXPECT_EQ(ids.get_element(4), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "one");
+    EXPECT_EQ(values.get_data_at(4).to_string(), "five");
+}
+
+TEST_F(NewParquetReaderTest, EmptySelectionUpdatesProfileCounters) {
+    RuntimeProfile profile("new_parquet_reader_empty_selection_profile");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 10));
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_TRUE(eof);
+    EXPECT_EQ(rows, 0);
+
+    ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr);
+    ASSERT_NE(profile.get_counter("SelectedRows"), nullptr);
+    ASSERT_NE(profile.get_counter("RowsFilteredByConjunct"), nullptr);
+    ASSERT_NE(profile.get_counter("TotalBatches"), nullptr);
+    ASSERT_NE(profile.get_counter("EmptySelectionBatches"), nullptr);
+    EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), ROW_COUNT);
+    EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 0);
+    EXPECT_EQ(profile.get_counter("RowsFilteredByConjunct")->value(), ROW_COUNT);
+    EXPECT_EQ(profile.get_counter("TotalBatches")->value(), 1);
+    EXPECT_EQ(profile.get_counter("EmptySelectionBatches")->value(), 1);
+}
+
+TEST_F(NewParquetReaderTest, ReadMultiPredicateColumnsBeforeExpressionFilter) {
+    write_int_pair_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0), field_projection(1)};
+    request->non_predicate_columns = {};
+    request->conjuncts.push_back(create_int32_sum_greater_than_conjunct(0, 1, 7));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 2);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& scores = nullable_nested_column<ColumnInt32>(block, 1);
+    ASSERT_EQ(ids.size(), 2);
+    ASSERT_EQ(scores.size(), 2);
+    EXPECT_EQ(ids.get_element(0), 4);
+    EXPECT_EQ(ids.get_element(1), 5);
+    EXPECT_EQ(scores.get_element(0), 4);
+    EXPECT_EQ(scores.get_element(1), 5);
+}
+
+TEST_F(NewParquetReaderTest, PredicateColumnFiltersBeforeNonPredicateRead) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& values = nullable_nested_column<ColumnString>(block, 1);
+    ASSERT_EQ(ids.size(), 3);
+    ASSERT_EQ(values.size(), 3);
+    EXPECT_EQ(ids.get_element(0), 3);
+    EXPECT_EQ(ids.get_element(1), 4);
+    EXPECT_EQ(ids.get_element(2), 5);
+    EXPECT_EQ(values.get_data_at(0).to_string(), "three");
+    EXPECT_EQ(values.get_data_at(1).to_string(), "four");
+    EXPECT_EQ(values.get_data_at(2).to_string(), "five");
+}
+
+TEST_F(NewParquetReaderTest, NonPredicateColumnKeepsSelectionFromPredicateColumn) {
+    write_int_pair_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& ids = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& scores = nullable_nested_column<ColumnInt32>(block, 1);
+    ASSERT_EQ(ids.size(), 3);
+    ASSERT_EQ(scores.size(), 3);
+    EXPECT_EQ(ids.get_element(0), 3);
+    EXPECT_EQ(ids.get_element(1), 4);
+    EXPECT_EQ(ids.get_element(2), 5);
+    EXPECT_EQ(scores.get_element(0), 3);
+    EXPECT_EQ(scores.get_element(1), 4);
+    EXPECT_EQ(scores.get_element(2), 5);
+}
+
+TEST_F(NewParquetReaderTest, PredicateFiltersRowGroupsByStatistics) {
+    write_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(2), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({3, 4, 5}));
+    EXPECT_EQ(values, std::vector<std::string>({"three", "four", "five"}));
+}
+
+TEST_F(NewParquetReaderTest, PredicateFiltersRowGroupsByDictionary) {
+    write_dictionary_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6);
+    for (int row_group_idx = 0; row_group_idx < 6; ++row_group_idx) {
+        auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx);
+        ASSERT_NE(row_group, nullptr);
+        auto value_chunk = row_group->ColumnChunk(1);
+        ASSERT_NE(value_chunk, nullptr);
+        ASSERT_TRUE(value_chunk->has_dictionary_page());
+        ASSERT_TRUE(value_chunk->statistics() == nullptr ||
+                    !value_chunk->statistics()->HasMinMax());
+    }
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 2);
+
+    format::FileScanRequest plan_request;
+    format::FileColumnPredicateFilter plan_column_filter;
+    plan_column_filter.file_column_id = format::LocalColumnId(1);
+    auto value_type = std::make_shared<DataTypeString>();
+    plan_column_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            1, "value", value_type, Field::create_field<TYPE_STRING>("lm"), false));
+    plan_request.column_predicate_filters.push_back(std::move(plan_column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         plan_request, scan_range, false, &plan)
+                        .ok());
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 6);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 5);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 5);
+    EXPECT_EQ(plan.pruning_stats.selected_row_ranges, 1);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(1)};
+    request->non_predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(create_string_in_conjunct(1, {"lm"}));
+    use_schema_order_positions(request.get(), schema);
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(1);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            1, "value", schema[1].type, Field::create_field<TYPE_STRING>("lm"), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({3}));
+    EXPECT_EQ(values, std::vector<std::string>({"lm"}));
+}
+
+TEST_F(NewParquetReaderTest, ScanRangeFiltersRowGroupsBeforeDictionaryPruning) {
+    write_dictionary_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6);
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(1);
+    auto value_type = std::make_shared<DataTypeString>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            1, "value", value_type, Field::create_field<TYPE_STRING>("lm"), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 2);
+    format::parquet::ParquetScanRange scan_range;
+    scan_range.start_offset = range_start_offset;
+    scan_range.size = range_size;
+    scan_range.file_size = static_cast<int64_t>(std::filesystem::file_size(_file_path));
+
+    format::parquet::RowGroupScanPlan plan;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].row_group_id, 2);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 6);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 0);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 0);
+}
+
+TEST_F(NewParquetReaderTest, NestedStructPredicateFiltersRowGroupsByStatistics) {
+    write_struct_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 2);
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 1);
+    ASSERT_EQ(file_schema[0]->children.size(), 2);
+    ASSERT_EQ(file_schema[0]->children[0]->name, "id");
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.file_child_id_path = {0};
+    auto id_type = std::make_shared<DataTypeInt32>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", id_type, Field::create_field<TYPE_INT>(5), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].row_group_id, 1);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 2);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 2);
+}
+
+TEST_F(NewParquetReaderTest, NestedStructPredicateFiltersRowGroupsByDictionary) {
+    write_nested_dictionary_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 6);
+    for (int row_group_idx = 0; row_group_idx < 6; ++row_group_idx) {
+        auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx);
+        ASSERT_NE(row_group, nullptr);
+        auto name_chunk = row_group->ColumnChunk(1);
+        ASSERT_NE(name_chunk, nullptr);
+        ASSERT_TRUE(name_chunk->has_dictionary_page());
+        ASSERT_TRUE(name_chunk->statistics() == nullptr || !name_chunk->statistics()->HasMinMax());
+    }
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 1);
+    ASSERT_EQ(file_schema[0]->children.size(), 2);
+    ASSERT_EQ(file_schema[0]->children[1]->name, "name");
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.file_child_id_path = {1};
+    auto name_type = std::make_shared<DataTypeString>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            0, "name", name_type, Field::create_field<TYPE_STRING>("lm"), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].row_group_id, 2);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 6);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_dictionary, 5);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 5);
+}
+
+TEST_F(NewParquetReaderTest, PlannerNarrowsRowRangesByPageIndex) {
+    write_page_index_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1);
+    auto page_index_reader = parquet_file_reader->GetPageIndexReader();
+    ASSERT_NE(page_index_reader, nullptr);
+    auto row_group_index_reader = page_index_reader->RowGroup(0);
+    ASSERT_NE(row_group_index_reader, nullptr);
+    auto offset_index = row_group_index_reader->GetOffsetIndex(0);
+    ASSERT_NE(offset_index, nullptr);
+    ASSERT_GT(offset_index->page_locations().size(), 1);
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 1);
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    auto id_type = std::make_shared<DataTypeInt32>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", id_type, Field::create_field<TYPE_INT>(63), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    ASSERT_FALSE(plan.row_groups[0].selected_ranges.empty());
+    EXPECT_GT(plan.row_groups[0].selected_ranges.front().start, 0);
+    EXPECT_LT(plan.row_groups[0].selected_ranges.front().length, 128);
+    auto skip_plan_it = plan.row_groups[0].page_skip_plans.find(0);
+    ASSERT_NE(skip_plan_it, plan.row_groups[0].page_skip_plans.end());
+    EXPECT_EQ(skip_plan_it->second.leaf_column_id, 0);
+    EXPECT_GT(skip_plan_it->second.skipped_ranges.size(), 0);
+    EXPECT_GT(skip_plan_it->second.skipped_pages.size(), 1);
+    ASSERT_EQ(skip_plan_it->second.skipped_pages.size(),
+              skip_plan_it->second.skipped_page_compressed_sizes.size());
+    int64_t skipped_compressed_bytes = 0;
+    for (size_t page_idx = 0; page_idx < skip_plan_it->second.skipped_pages.size(); ++page_idx) {
+        if (skip_plan_it->second.should_skip_page(page_idx)) {
+            skipped_compressed_bytes += skip_plan_it->second.skipped_page_compressed_size(page_idx);
+        }
+    }
+    EXPECT_GT(skipped_compressed_bytes, 0);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 0);
+    EXPECT_GT(plan.pruning_stats.filtered_page_rows, 0);
+    EXPECT_EQ(plan.pruning_stats.selected_row_ranges, plan.row_groups[0].selected_ranges.size());
+}
+
+TEST_F(NewParquetReaderTest, NestedStructPredicateNarrowsRowRangesByPageIndex) {
+    write_nested_page_index_filter_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1);
+    auto page_index_reader = parquet_file_reader->GetPageIndexReader();
+    ASSERT_NE(page_index_reader, nullptr);
+    auto row_group_index_reader = page_index_reader->RowGroup(0);
+    ASSERT_NE(row_group_index_reader, nullptr);
+    auto offset_index = row_group_index_reader->GetOffsetIndex(0);
+    ASSERT_NE(offset_index, nullptr);
+    ASSERT_GT(offset_index->page_locations().size(), 1);
+
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = parquet_file_reader->metadata()->schema();
+    ASSERT_NE(schema_descriptor, nullptr);
+    ASSERT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 1);
+    ASSERT_EQ(file_schema[0]->children.size(), 2);
+    ASSERT_EQ(file_schema[0]->children[0]->name, "id");
+
+    format::FileScanRequest request;
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.file_child_id_path = {0};
+    auto id_type = std::make_shared<DataTypeInt32>();
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", id_type, Field::create_field<TYPE_INT>(63), false));
+    request.column_predicate_filters.push_back(std::move(column_filter));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    ASSERT_FALSE(plan.row_groups[0].selected_ranges.empty());
+    EXPECT_GT(plan.row_groups[0].selected_ranges.front().start, 0);
+    EXPECT_LT(plan.row_groups[0].selected_ranges.front().length, 128);
+    auto skip_plan_it = plan.row_groups[0].page_skip_plans.find(0);
+    ASSERT_NE(skip_plan_it, plan.row_groups[0].page_skip_plans.end());
+    EXPECT_EQ(skip_plan_it->second.leaf_column_id, 0);
+    EXPECT_GT(skip_plan_it->second.skipped_ranges.size(), 0);
+    EXPECT_GT(skip_plan_it->second.skipped_pages.size(), 1);
+    ASSERT_EQ(skip_plan_it->second.skipped_pages.size(),
+              skip_plan_it->second.skipped_page_compressed_sizes.size());
+    int64_t skipped_compressed_bytes = 0;
+    for (size_t page_idx = 0; page_idx < skip_plan_it->second.skipped_pages.size(); ++page_idx) {
+        if (skip_plan_it->second.should_skip_page(page_idx)) {
+            skipped_compressed_bytes += skip_plan_it->second.skipped_page_compressed_size(page_idx);
+        }
+    }
+    EXPECT_GT(skipped_compressed_bytes, 0);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 0);
+    EXPECT_GT(plan.pruning_stats.filtered_page_rows, 0);
+    EXPECT_EQ(plan.pruning_stats.selected_row_ranges, plan.row_groups[0].selected_ranges.size());
+}
+
+TEST_F(NewParquetReaderTest, PageIndexFilteredPagesDoNotDoubleSkipOutputColumns) {
+    write_page_index_filter_pair_parquet_file(_file_path);
+    RuntimeProfile profile("new_parquet_reader_page_skip");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 2);
+    Block block = build_file_block(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 63));
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(63), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<int32_t> payloads;
+    bool eof = false;
+    while (!eof) {
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& payload_column = nullable_nested_column<ColumnInt32>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            payloads.push_back(payload_column.get_element(row));
+        }
+    }
+
+    ASSERT_NE(profile.get_counter("PagesSkippedByDataPageFilter"), nullptr);
+    ASSERT_NE(profile.get_counter("DataPageFilterSkipBytes"), nullptr);
+    ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr);
+    ASSERT_NE(profile.get_counter("SelectedRows"), nullptr);
+    ASSERT_NE(profile.get_counter("RangeGapSkippedRows"), nullptr);
+    ASSERT_NE(profile.get_counter("ReaderSkipRows"), nullptr);
+    ASSERT_NE(profile.get_counter("RowGroupFilterTime"), nullptr);
+    ASSERT_NE(profile.get_counter("PageIndexFilterTime"), nullptr);
+    ASSERT_NE(profile.get_counter("PageIndexReadTime"), nullptr);
+    EXPECT_GT(profile.get_counter("PagesSkippedByDataPageFilter")->value(), 0);
+    EXPECT_GT(profile.get_counter("DataPageFilterSkipBytes")->value(), 0);
+    EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), 64);
+    EXPECT_EQ(profile.get_counter("SelectedRows")->value(), 64);
+    EXPECT_GT(profile.get_counter("RangeGapSkippedRows")->value(), 0);
+    EXPECT_EQ(profile.get_counter("ReaderSkipRows")->value(), 0);
+    EXPECT_GT(profile.get_counter("RowGroupFilterTime")->value(), 0);
+    EXPECT_GT(profile.get_counter("PageIndexFilterTime")->value(), 0);
+    EXPECT_GT(profile.get_counter("PageIndexReadTime")->value(), 0);
+
+    ASSERT_EQ(ids.size(), 64);
+    ASSERT_EQ(payloads.size(), ids.size());
+    for (size_t row = 0; row < ids.size(); ++row) {
+        EXPECT_EQ(ids[row], static_cast<int32_t>(row + 64));
+        EXPECT_EQ(payloads[row], ids[row] + 1000);
+    }
+}
+
+TEST_F(NewParquetReaderTest, InPredicateFiltersRowGroupsByDictionary) {
+    write_dictionary_filter_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(1)};
+    request->non_predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(create_string_in_conjunct(1, {"az", "za"}));
+    use_schema_order_positions(request.get(), schema);
+    auto set = build_set<TYPE_STRING>();
+    set->insert(const_cast<char*>("az"), 2);
+    set->insert(const_cast<char*>("za"), 2);
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(1);
+    column_filter.predicates.push_back(create_in_list_predicate<PredicateType::IN_LIST>(
+            1, "value", schema[1].type, set, false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({2, 5}));
+    EXPECT_EQ(values, std::vector<std::string>({"az", "za"}));
+}
+
+TEST_F(NewParquetReaderTest, DictionaryPageV2StringEdgesSurviveSelection) {
+    write_dictionary_edge_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 4);
+    for (int row_group_idx = 0; row_group_idx < 4; ++row_group_idx) {
+        auto row_group = parquet_file_reader->metadata()->RowGroup(row_group_idx);
+        ASSERT_NE(row_group, nullptr);
+        ASSERT_TRUE(row_group->ColumnChunk(1)->has_dictionary_page());
+    }
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(1)};
+    request->non_predicate_columns = {field_projection(0)};
+    request->conjuncts.push_back(create_string_in_conjunct(1, {"", "same"}));
+    use_schema_order_positions(request.get(), schema);
+    auto set = build_set<TYPE_STRING>();
+    set->insert(const_cast<char*>(""), 0);
+    set->insert(const_cast<char*>("same"), 4);
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(1);
+    column_filter.predicates.push_back(create_in_list_predicate<PredicateType::IN_LIST>(
+            1, "value", schema[1].type, set, false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 5, 7}));
+    EXPECT_EQ(values, std::vector<std::string>({"", "same", "", "same"}));
+}
+
+TEST_F(NewParquetReaderTest, StatisticsPruningSkipsPrefixRowGroupsAndReadsLaterGroups) {
+    write_parquet_file(_file_path, 1);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 5);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(1)};
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 3));
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(4), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& value_column = nullable_nested_column<ColumnString>(block, 1);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            values.push_back(value_column.get_data_at(row).to_string());
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({4, 5}));
+    EXPECT_EQ(values, std::vector<std::string>({"four", "five"}));
+}
+
+TEST_F(NewParquetReaderTest, RowPositionReaderReturnsFileLocalPositions) {
+    write_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID),
+                                      field_projection(0)};
+    request->local_positions = {
+            {format::LocalColumnId(0), format::LocalIndex(0)},
+            {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+    };
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int64_t> row_positions;
+    std::vector<int32_t> ids;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block_with_row_position(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+        const auto& row_position_column =
+                assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            row_positions.push_back(row_position_column.get_element(row));
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 3, 4, 5}));
+    EXPECT_EQ(row_positions, std::vector<int64_t>({0, 1, 2, 3, 4}));
+}
+
+TEST_F(NewParquetReaderTest, RowPositionReaderKeepsPositionsAfterSelection) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block_with_row_position(schema);
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0)};
+    request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID)};
+    request->local_positions = {
+            {format::LocalColumnId(0), format::LocalIndex(0)},
+            {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+    };
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& row_position_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    EXPECT_EQ(id_column.get_element(1), 4);
+    EXPECT_EQ(id_column.get_element(2), 5);
+    EXPECT_EQ(row_position_column.get_element(0), 2);
+    EXPECT_EQ(row_position_column.get_element(1), 3);
+    EXPECT_EQ(row_position_column.get_element(2), 4);
+}
+
+TEST_F(NewParquetReaderTest, DeletePredicateFiltersRowPositions) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block_with_row_position(schema);
+
+    static const std::vector<int64_t> deleted_rows {1, 3};
+    auto delete_predicate = std::make_shared<format::DeletePredicate>(deleted_rows);
+    delete_predicate->add_child(VSlotRef::create_shared(2, 2, -1, std::make_shared<DataTypeInt64>(),
+                                                        format::ROW_POSITION_COLUMN_NAME));
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID)};
+    request->non_predicate_columns = {field_projection(0)};
+    request->local_positions = {
+            {format::LocalColumnId(0), format::LocalIndex(0)},
+            {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+    };
+    request->delete_conjuncts.push_back(VExprContext::create_shared(std::move(delete_predicate)));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 3);
+
+    const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& row_position_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 3);
+    EXPECT_EQ(id_column.get_element(2), 5);
+    EXPECT_EQ(row_position_column.get_element(0), 0);
+    EXPECT_EQ(row_position_column.get_element(1), 2);
+    EXPECT_EQ(row_position_column.get_element(2), 4);
+}
+
+TEST_F(NewParquetReaderTest, QueryPredicateAndDeletePredicateFilterRowPositions) {
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    Block block = build_file_block_with_row_position(schema);
+
+    static const std::vector<int64_t> deleted_rows {3};
+    auto delete_predicate = std::make_shared<format::DeletePredicate>(deleted_rows);
+    delete_predicate->add_child(VSlotRef::create_shared(2, 2, -1, std::make_shared<DataTypeInt64>(),
+                                                        format::ROW_POSITION_COLUMN_NAME));
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->predicate_columns = {field_projection(0),
+                                  field_projection(format::ROW_POSITION_COLUMN_ID)};
+    request->non_predicate_columns = {};
+    request->local_positions = {
+            {format::LocalColumnId(0), format::LocalIndex(0)},
+            {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+    };
+    request->conjuncts.push_back(create_int32_greater_than_conjunct(0, 2));
+    request->delete_conjuncts.push_back(VExprContext::create_shared(std::move(delete_predicate)));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_FALSE(eof);
+    ASSERT_EQ(rows, 2);
+
+    const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+    const auto& row_position_column =
+            assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    EXPECT_EQ(id_column.get_element(1), 5);
+    EXPECT_EQ(row_position_column.get_element(0), 2);
+    EXPECT_EQ(row_position_column.get_element(1), 4);
+}
+
+TEST_F(NewParquetReaderTest, RowPositionReaderUsesFileLocalPositionsForScanRange) {
+    write_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+
+    const std::vector<std::vector<int32_t>> expected_ids = {{1, 2}, {3, 4}, {5}};
+    const std::vector<std::vector<int64_t>> expected_row_positions = {{0, 1}, {2, 3}, {4}};
+    for (int row_group_idx = 0; row_group_idx < 3; ++row_group_idx) {
+        const auto [range_start_offset, range_size] =
+                row_group_mid_range(_file_path, row_group_idx);
+        auto reader = create_reader(range_start_offset, range_size);
+        RuntimeState state {TQueryOptions(), TQueryGlobals()};
+        ASSERT_TRUE(reader->init(&state).ok());
+
+        std::vector<format::ColumnDefinition> schema;
+        ASSERT_TRUE(reader->get_schema(&schema).ok());
+        auto request = std::make_shared<format::FileScanRequest>();
+        request->non_predicate_columns = {field_projection(format::ROW_POSITION_COLUMN_ID),
+                                          field_projection(0)};
+        request->local_positions = {
+                {format::LocalColumnId(0), format::LocalIndex(0)},
+                {format::LocalColumnId(format::ROW_POSITION_COLUMN_ID), format::LocalIndex(2)},
+        };
+        ASSERT_TRUE(reader->open(request).ok());
+
+        std::vector<int32_t> ids;
+        std::vector<int64_t> row_positions;
+        bool eof = false;
+        while (!eof) {
+            Block block = build_file_block_with_row_position(schema);
+            size_t rows = 0;
+            ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+            if (rows == 0) {
+                continue;
+            }
+            const auto& id_column = nullable_nested_column<ColumnInt32>(block, 0);
+            const auto& row_position_column =
+                    assert_cast<const ColumnInt64&>(*block.get_by_position(2).column);
+            for (size_t row = 0; row < rows; ++row) {
+                ids.push_back(id_column.get_element(row));
+                row_positions.push_back(row_position_column.get_element(row));
+            }
+        }
+
+        EXPECT_EQ(ids, expected_ids[row_group_idx]);
+        EXPECT_EQ(row_positions, expected_row_positions[row_group_idx]);
+    }
+}
+
+} // namespace
+} // namespace doris
diff --git a/be/test/format_v2/parquet/parquet_scan_test.cpp b/be/test/format_v2/parquet/parquet_scan_test.cpp
new file mode 100644
index 00000000000000..3b381c3158fd45
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_scan_test.cpp
@@ -0,0 +1,804 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_scan.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <cstring>
+#include <filesystem>
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "common/config.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/parquet_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "gen_cpp/Types_types.h"
+#include "io/io_common.h"
+#include "runtime/runtime_state.h"
+#include "storage/predicate/predicate_creator.h"
+#include "storage/utils.h"
+
+namespace doris {
+namespace {
+
+format::LocalColumnIndex field_projection(int32_t column_id) {
+    return format::LocalColumnIndex {.index = column_id};
+}
+
+const ColumnInt32& int32_data_column(const IColumn& column) {
+    if (const auto* nullable_column = check_and_get_column<ColumnNullable>(&column)) {
+        return assert_cast<const ColumnInt32&>(nullable_column->get_nested_column());
+    }
+    return assert_cast<const ColumnInt32&>(column);
+}
+
+const ColumnString& string_data_column(const IColumn& column) {
+    if (const auto* nullable_column = check_and_get_column<ColumnNullable>(&column)) {
+        return assert_cast<const ColumnString&>(nullable_column->get_nested_column());
+    }
+    return assert_cast<const ColumnString&>(column);
+}
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_struct_array(const std::vector<int32_t>& ids,
+                                                 const std::vector<std::string>& names) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false),
+                                       arrow::field("name", arrow::utf8(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(
+            std::make_unique<arrow::Int32Builder>().release()));
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(
+            std::make_unique<arrow::StringBuilder>().release()));
+    arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                 std::move(field_builders));
+    auto* id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    auto* name_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+    for (size_t row = 0; row < ids.size(); ++row) {
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(id_builder->Append(ids[row]).ok());
+        EXPECT_TRUE(name_builder->Append(names[row]).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_list_array() {
+    auto value_builder = std::make_unique<arrow::Int32Builder>();
+    arrow::ListBuilder builder(arrow::default_memory_pool(), std::move(value_builder));
+    auto* int_builder = assert_cast<arrow::Int32Builder*>(builder.value_builder());
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(int_builder->Append(1).ok());
+    EXPECT_TRUE(int_builder->Append(2).ok());
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(int_builder->Append(3).ok());
+    EXPECT_TRUE(builder.Append().ok());
+    return finish_array(&builder);
+}
+
+void write_table(const std::string& file_path, const std::shared_ptr<arrow::Table>& table,
+                 int64_t row_group_size, bool enable_dictionary = false,
+                 bool enable_page_index = false, bool enable_statistics = true) {
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    if (enable_dictionary) {
+        builder.enable_dictionary();
+    } else {
+        builder.disable_dictionary();
+    }
+    if (enable_page_index) {
+        builder.enable_write_page_index();
+        builder.write_batch_size(8);
+        builder.data_pagesize(10);
+    }
+    if (!enable_statistics) {
+        builder.disable_statistics();
+    }
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      row_group_size, builder.build()));
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, int64_t row_group_size = 2,
+                                 bool enable_statistics = true) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("score", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array({1, 2, 3, 4, 5, 6}),
+                                             build_int32_array({10, 20, 30, 40, 50, 60})});
+    write_table(file_path, table, row_group_size, false, false, enable_statistics);
+}
+
+void write_struct_parquet_file(const std::string& file_path) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false),
+                                       arrow::field("name", arrow::utf8(), false)});
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_struct_array({1, 2, 10, 11}, {"one", "two", "ten", "eleven"})});
+    write_table(file_path, table, 2);
+}
+
+void write_list_parquet_file(const std::string& file_path) {
+    auto schema = arrow::schema({
+            arrow::field("xs", arrow::list(arrow::int32()), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_list_array()});
+    write_table(file_path, table, 2);
+}
+
+void write_page_index_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids)});
+    write_table(file_path, table, ids.size(), false, true);
+}
+
+void write_page_index_pair_parquet_file(const std::string& file_path) {
+    std::vector<int32_t> ids(128);
+    std::iota(ids.begin(), ids.end(), 0);
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("score", arrow::int32(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(ids)});
+    write_table(file_path, table, ids.size(), false, true);
+}
+
+int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? static_cast<int64_t>(column_metadata.dictionary_page_offset())
+                   : static_cast<int64_t>(column_metadata.data_page_offset());
+}
+
+std::pair<int64_t, int64_t> row_group_mid_range(const std::string& file_path, int row_group_idx) {
+    auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false);
+    auto metadata = reader->metadata();
+    auto row_group_metadata = metadata->RowGroup(row_group_idx);
+    auto first_column = row_group_metadata->ColumnChunk(0);
+    auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1);
+    const int64_t row_group_start_offset = parquet_column_start_offset(*first_column);
+    const int64_t row_group_end_offset =
+            parquet_column_start_offset(*last_column) + last_column->total_compressed_size();
+    const int64_t row_group_mid_offset =
+            row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2;
+    return {row_group_mid_offset, 1};
+}
+
+Block build_file_block(const std::vector<format::ColumnDefinition>& schema) {
+    Block block;
+    for (const auto& field : schema) {
+        block.insert({field.type->create_column(), field.type, field.name});
+    }
+    return block;
+}
+
+GlobalRowLoacationV2 decode_rowid(const ColumnString& column, size_t row) {
+    const auto ref = column.get_data_at(row);
+    EXPECT_EQ(ref.size, sizeof(GlobalRowLoacationV2));
+    GlobalRowLoacationV2 location(0, 0, 0, 0);
+    std::memcpy(&location, ref.data, sizeof(GlobalRowLoacationV2));
+    return location;
+}
+
+void use_schema_order_positions(format::FileScanRequest* request,
+                                const std::vector<format::ColumnDefinition>& schema) {
+    DORIS_CHECK(request != nullptr);
+    for (size_t idx = 0; idx < schema.size(); ++idx) {
+        request->local_positions.emplace(format::LocalColumnId(schema[idx].local_id),
+                                         format::LocalIndex(idx));
+    }
+}
+
+std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> build_file_schema(
+        const ::parquet::ParquetFileReader& reader) {
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    auto schema_descriptor = reader.metadata()->schema();
+    EXPECT_NE(schema_descriptor, nullptr);
+    EXPECT_TRUE(
+            format::parquet::build_parquet_column_schema(*schema_descriptor, &file_schema).ok());
+    return file_schema;
+}
+
+format::FileColumnPredicateFilter int32_filter(int32_t column_id, std::string column_name,
+                                               const DataTypePtr& type,
+                                               PredicateType predicate_type, int32_t value) {
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(column_id);
+    switch (predicate_type) {
+    case PredicateType::GE:
+        column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+                column_id, column_name, type, Field::create_field<TYPE_INT>(value), false));
+        break;
+    case PredicateType::GT:
+        column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+                column_id, column_name, type, Field::create_field<TYPE_INT>(value), false));
+        break;
+    case PredicateType::LT:
+        column_filter.predicates.push_back(create_comparison_predicate<PredicateType::LT>(
+                column_id, column_name, type, Field::create_field<TYPE_INT>(value), false));
+        break;
+    default:
+        DORIS_CHECK(false);
+    }
+    return column_filter;
+}
+
+int64_t count_range_rows(const std::vector<format::parquet::RowRange>& ranges) {
+    int64_t rows = 0;
+    for (const auto& range : ranges) {
+        rows += range.length;
+    }
+    return rows;
+}
+
+class ParquetScanTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_format_v2_parquet_scan_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "scan.parquet").string();
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+    std::unique_ptr<format::parquet::ParquetReader> create_reader(
+            int64_t range_start_offset = 0, int64_t range_size = -1,
+            RuntimeProfile* profile = nullptr,
+            std::optional<format::GlobalRowIdContext> global_rowid_context = std::nullopt) const {
+        auto system_properties = std::make_shared<io::FileSystemProperties>();
+        system_properties->system_type = TFileType::FILE_LOCAL;
+        auto file_description = std::make_unique<io::FileDescription>();
+        file_description->path = _file_path;
+        file_description->file_size = static_cast<int64_t>(std::filesystem::file_size(_file_path));
+        file_description->range_start_offset = range_start_offset;
+        file_description->range_size = range_size;
+        return std::make_unique<format::parquet::ParquetReader>(
+                system_properties, file_description, nullptr, profile, global_rowid_context);
+    }
+
+    std::shared_ptr<format::FileScanRequest> open_all_row_groups(
+            format::parquet::ParquetReader* reader) {
+        auto request = std::make_shared<format::FileScanRequest>();
+        EXPECT_TRUE(reader->open(request).ok());
+        return request;
+    }
+
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+};
+
+TEST_F(ParquetScanTest, PlanRowGroupsAppliesScanRangeBeforeStatistics) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 5));
+
+    const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 1);
+    format::parquet::ParquetScanRange scan_range;
+    scan_range.start_offset = range_start_offset;
+    scan_range.size = range_size;
+    scan_range.file_size = static_cast<int64_t>(std::filesystem::file_size(_file_path));
+
+    format::parquet::RowGroupScanPlan plan;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    EXPECT_TRUE(plan.row_groups.empty());
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 3);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 0);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 2);
+}
+
+TEST_F(ParquetScanTest, PlanRowGroupsPreservesFirstFileRowAcrossPrunedRowGroups) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 5));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    ASSERT_EQ(plan.row_groups.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].row_group_id, 2);
+    EXPECT_EQ(plan.row_groups[0].first_file_row, 4);
+    EXPECT_EQ(plan.row_groups[0].row_group_rows, 2);
+    ASSERT_EQ(plan.row_groups[0].selected_ranges.size(), 1);
+    EXPECT_EQ(plan.row_groups[0].selected_ranges[0].start, 0);
+    EXPECT_EQ(plan.row_groups[0].selected_ranges[0].length, 2);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 2);
+    EXPECT_EQ(plan.pruning_stats.filtered_group_rows, 4);
+}
+
+TEST_F(ParquetScanTest, PlanRowGroupsSelectsAllRowGroupsWithoutFilters) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+
+    ASSERT_EQ(plan.row_groups.size(), 3);
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 3);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 3);
+    for (size_t row_group_idx = 0; row_group_idx < plan.row_groups.size(); ++row_group_idx) {
+        EXPECT_EQ(plan.row_groups[row_group_idx].row_group_id, row_group_idx);
+        EXPECT_EQ(plan.row_groups[row_group_idx].first_file_row,
+                  static_cast<int64_t>(row_group_idx * 2));
+        ASSERT_EQ(plan.row_groups[row_group_idx].selected_ranges.size(), 1);
+        EXPECT_EQ(plan.row_groups[row_group_idx].selected_ranges[0].start, 0);
+        EXPECT_EQ(plan.row_groups[row_group_idx].selected_ranges[0].length, 2);
+        EXPECT_TRUE(plan.row_groups[row_group_idx].page_skip_plans.empty());
+    }
+}
+
+TEST_F(ParquetScanTest, PageIndexIntersectsMultipleFiltersAndBuildsSkipPlan) {
+    write_page_index_pair_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest single_filter_request;
+    single_filter_request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32));
+    format::parquet::RowGroupScanPlan single_filter_plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(
+                        *parquet_file_reader->metadata(), parquet_file_reader.get(), file_schema,
+                        single_filter_request, scan_range, false, &single_filter_plan)
+                        .ok());
+    ASSERT_EQ(single_filter_plan.row_groups.size(), 1);
+    const int64_t single_filter_rows =
+            count_range_rows(single_filter_plan.row_groups[0].selected_ranges);
+
+    format::FileScanRequest intersect_request;
+    intersect_request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32));
+    intersect_request.column_predicate_filters.push_back(
+            int32_filter(1, "score", file_schema[1]->type, PredicateType::LT, 96));
+    format::parquet::RowGroupScanPlan intersect_plan;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(
+                        *parquet_file_reader->metadata(), parquet_file_reader.get(), file_schema,
+                        intersect_request, scan_range, false, &intersect_plan)
+                        .ok());
+    ASSERT_EQ(intersect_plan.row_groups.size(), 1);
+    ASSERT_FALSE(intersect_plan.row_groups[0].selected_ranges.empty());
+    const int64_t intersect_rows = count_range_rows(intersect_plan.row_groups[0].selected_ranges);
+    EXPECT_GT(single_filter_rows, intersect_rows);
+    EXPECT_GT(intersect_plan.row_groups[0].selected_ranges.front().start, 0);
+    const auto& last_range = intersect_plan.row_groups[0].selected_ranges.back();
+    EXPECT_LT(last_range.start + last_range.length, 128);
+    EXPECT_GT(intersect_plan.pruning_stats.filtered_page_rows, 0);
+    EXPECT_EQ(intersect_plan.pruning_stats.selected_row_ranges,
+              intersect_plan.row_groups[0].selected_ranges.size());
+
+    auto id_skip_plan = intersect_plan.row_groups[0].page_skip_plans.find(0);
+    ASSERT_NE(id_skip_plan, intersect_plan.row_groups[0].page_skip_plans.end());
+    EXPECT_EQ(id_skip_plan->second.leaf_column_id, 0);
+    EXPECT_FALSE(id_skip_plan->second.empty());
+    auto score_skip_plan = intersect_plan.row_groups[0].page_skip_plans.find(1);
+    ASSERT_NE(score_skip_plan, intersect_plan.row_groups[0].page_skip_plans.end());
+    EXPECT_EQ(score_skip_plan->second.leaf_column_id, 1);
+    EXPECT_FALSE(score_skip_plan->second.empty());
+}
+
+TEST_F(ParquetScanTest, PageIndexCanFullyFilterRowGroupAfterRangeIntersection) {
+    write_page_index_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 1);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GE, 32));
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::LT, 32));
+
+    format::parquet::RowGroupScanPlan plan;
+    format::parquet::ParquetScanRange scan_range;
+    ASSERT_TRUE(format::parquet::plan_parquet_row_groups(*parquet_file_reader->metadata(),
+                                                         parquet_file_reader.get(), file_schema,
+                                                         request, scan_range, false, &plan)
+                        .ok());
+    EXPECT_TRUE(plan.row_groups.empty());
+    EXPECT_EQ(plan.pruning_stats.total_row_groups, 1);
+    EXPECT_EQ(plan.pruning_stats.selected_row_groups, 0);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_statistics, 0);
+    EXPECT_EQ(plan.pruning_stats.filtered_row_groups_by_page_index, 1);
+    EXPECT_EQ(plan.pruning_stats.filtered_page_rows, 128);
+}
+
+TEST_F(ParquetScanTest, PageIndexFullRangeWhenDisabledOrUnavailable) {
+    write_page_index_parquet_file(_file_path);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    auto file_schema = build_file_schema(*parquet_file_reader);
+
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(
+            int32_filter(0, "id", file_schema[0]->type, PredicateType::GT, 63));
+
+    const bool old_enable_page_index = config::enable_parquet_page_index;
+    config::enable_parquet_page_index = false;
+    std::vector<format::parquet::RowRange> selected_ranges;
+    std::map<int, format::parquet::ParquetPageSkipPlan> page_skip_plans;
+    format::parquet::ParquetPruningStats pruning_stats;
+    ASSERT_TRUE(format::parquet::select_row_group_ranges_by_page_index(
+                        parquet_file_reader.get(), file_schema, request, 0, 128, &selected_ranges,
+                        &page_skip_plans, &pruning_stats)
+                        .ok());
+    config::enable_parquet_page_index = old_enable_page_index;
+    ASSERT_EQ(selected_ranges.size(), 1);
+    EXPECT_EQ(selected_ranges[0].start, 0);
+    EXPECT_EQ(selected_ranges[0].length, 128);
+    EXPECT_TRUE(page_skip_plans.empty());
+    EXPECT_EQ(pruning_stats.page_index_read_calls, 0);
+
+    write_int_pair_parquet_file(_file_path, 6);
+    auto no_index_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    auto no_index_schema = build_file_schema(*no_index_reader);
+    format::FileScanRequest no_index_request;
+    no_index_request.column_predicate_filters.push_back(
+            int32_filter(0, "id", no_index_schema[0]->type, PredicateType::GT, 3));
+    selected_ranges.clear();
+    page_skip_plans.clear();
+    pruning_stats = {};
+    ASSERT_TRUE(format::parquet::select_row_group_ranges_by_page_index(
+                        no_index_reader.get(), no_index_schema, no_index_request, 0, 6,
+                        &selected_ranges, &page_skip_plans, &pruning_stats)
+                        .ok());
+    ASSERT_EQ(selected_ranges.size(), 1);
+    EXPECT_EQ(selected_ranges[0].start, 0);
+    EXPECT_EQ(selected_ranges[0].length, 6);
+    EXPECT_TRUE(page_skip_plans.empty());
+}
+
+TEST_F(ParquetScanTest, AggregateCountAndMinMaxUseAllSelectedRowGroups) {
+    write_int_pair_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+    open_all_row_groups(reader.get());
+
+    format::FileAggregateResult count_result;
+    format::FileAggregateRequest count_request;
+    count_request.agg_type = TPushAggOp::COUNT;
+    ASSERT_TRUE(reader->get_aggregate_result(count_request, &count_result).ok());
+    EXPECT_EQ(count_result.count, 6);
+    EXPECT_TRUE(count_result.columns.empty());
+
+    format::FileAggregateResult minmax_result;
+    format::FileAggregateRequest minmax_request;
+    minmax_request.agg_type = TPushAggOp::MINMAX;
+    minmax_request.columns.push_back({.projection = field_projection(0)});
+    minmax_request.columns.push_back({.projection = field_projection(1)});
+    ASSERT_TRUE(reader->get_aggregate_result(minmax_request, &minmax_result).ok());
+    EXPECT_EQ(minmax_result.count, 6);
+    ASSERT_EQ(minmax_result.columns.size(), 2);
+    EXPECT_TRUE(minmax_result.columns[0].has_min);
+    EXPECT_TRUE(minmax_result.columns[0].has_max);
+    EXPECT_EQ(minmax_result.columns[0].min_value.get<TYPE_INT>(), 1);
+    EXPECT_EQ(minmax_result.columns[0].max_value.get<TYPE_INT>(), 6);
+    EXPECT_EQ(minmax_result.columns[1].min_value.get<TYPE_INT>(), 10);
+    EXPECT_EQ(minmax_result.columns[1].max_value.get<TYPE_INT>(), 60);
+}
+
+TEST_F(ParquetScanTest, AggregateRespectsStatisticsPrunedRowGroups) {
+    write_int_pair_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(5), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    format::FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::MINMAX;
+    aggregate_request.columns.push_back({.projection = field_projection(0)});
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok());
+    EXPECT_EQ(result.count, 2);
+    ASSERT_EQ(result.columns.size(), 1);
+    EXPECT_EQ(result.columns[0].min_value.get<TYPE_INT>(), 5);
+    EXPECT_EQ(result.columns[0].max_value.get<TYPE_INT>(), 6);
+}
+
+TEST_F(ParquetScanTest, AggregateCountKeepsRowGroupRowsAfterPageIndexPruning) {
+    write_page_index_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(63), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    format::FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::COUNT;
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok());
+    EXPECT_EQ(result.count, 128);
+}
+
+TEST_F(ParquetScanTest, AggregateMinMaxSupportsNestedSingleLeafProjection) {
+    write_struct_parquet_file(_file_path);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+    open_all_row_groups(reader.get());
+
+    format::LocalColumnIndex nested_id = format::LocalColumnIndex::partial_local(0);
+    nested_id.children.push_back(field_projection(0));
+    format::FileAggregateRequest aggregate_request;
+    aggregate_request.agg_type = TPushAggOp::MINMAX;
+    aggregate_request.columns.push_back({.projection = nested_id});
+    format::FileAggregateResult result;
+    ASSERT_TRUE(reader->get_aggregate_result(aggregate_request, &result).ok());
+    EXPECT_EQ(result.count, 4);
+    ASSERT_EQ(result.columns.size(), 1);
+    EXPECT_EQ(result.columns[0].min_value.get<TYPE_INT>(), 1);
+    EXPECT_EQ(result.columns[0].max_value.get<TYPE_INT>(), 11);
+}
+
+TEST_F(ParquetScanTest, AggregateRejectsRepeatedMissingStatisticsAndInvalidRequests) {
+    write_list_parquet_file(_file_path);
+    auto repeated_reader = create_reader();
+    RuntimeState repeated_state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(repeated_reader->init(&repeated_state).ok());
+    open_all_row_groups(repeated_reader.get());
+
+    format::FileAggregateRequest repeated_request;
+    repeated_request.agg_type = TPushAggOp::MINMAX;
+    repeated_request.columns.push_back({.projection = field_projection(0)});
+    format::FileAggregateResult repeated_result;
+    EXPECT_FALSE(repeated_reader->get_aggregate_result(repeated_request, &repeated_result).ok());
+
+    write_int_pair_parquet_file(_file_path, 2, false);
+    auto no_stats_reader = create_reader();
+    RuntimeState no_stats_state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(no_stats_reader->init(&no_stats_state).ok());
+    open_all_row_groups(no_stats_reader.get());
+    format::FileAggregateRequest no_stats_request;
+    no_stats_request.agg_type = TPushAggOp::MINMAX;
+    no_stats_request.columns.push_back({.projection = field_projection(0)});
+    format::FileAggregateResult no_stats_result;
+    EXPECT_FALSE(no_stats_reader->get_aggregate_result(no_stats_request, &no_stats_result).ok());
+
+    format::FileAggregateRequest invalid_type_request;
+    invalid_type_request.agg_type = TPushAggOp::MIX;
+    format::FileAggregateResult invalid_type_result;
+    EXPECT_FALSE(
+            no_stats_reader->get_aggregate_result(invalid_type_request, &invalid_type_result).ok());
+
+    format::FileAggregateRequest invalid_column_request;
+    invalid_column_request.agg_type = TPushAggOp::MINMAX;
+    invalid_column_request.columns.push_back({.projection = field_projection(100)});
+    format::FileAggregateResult invalid_column_result;
+    EXPECT_FALSE(
+            no_stats_reader->get_aggregate_result(invalid_column_request, &invalid_column_result)
+                    .ok());
+}
+
+TEST_F(ParquetScanTest, GlobalRowIdUsesFileLocalPositionForScanRange) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto parquet_file_reader = ::parquet::ParquetFileReader::OpenFile(_file_path, false);
+    ASSERT_EQ(parquet_file_reader->metadata()->num_row_groups(), 3);
+    const auto [range_start_offset, range_size] = row_group_mid_range(_file_path, 1);
+    format::GlobalRowIdContext context {.version = 7, .backend_id = 123456789, .file_id = 42};
+    auto reader = create_reader(range_start_offset, range_size, nullptr, context);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0),
+                                      field_projection(format::GLOBAL_ROWID_COLUMN_ID)};
+    use_schema_order_positions(request.get(), schema);
+    ASSERT_TRUE(reader->open(request).ok());
+
+    std::vector<int32_t> ids;
+    std::vector<uint32_t> row_ids;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        if (rows == 0) {
+            continue;
+        }
+        const auto& id_column = int32_data_column(*block.get_by_position(0).column);
+        const auto& rowid_column = string_data_column(*block.get_by_position(2).column);
+        for (size_t row = 0; row < rows; ++row) {
+            ids.push_back(id_column.get_element(row));
+            const auto location = decode_rowid(rowid_column, row);
+            EXPECT_EQ(location.version, context.version);
+            EXPECT_EQ(location.backend_id, context.backend_id);
+            EXPECT_EQ(location.file_id, context.file_id);
+            row_ids.push_back(location.row_id);
+        }
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({3, 4}));
+    EXPECT_EQ(row_ids, std::vector<uint32_t>({2, 3}));
+}
+
+TEST_F(ParquetScanTest, EmptyScanPlanReturnsEofWithoutReadingColumns) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(100), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    Block block = build_file_block(schema);
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_EQ(rows, 0);
+    EXPECT_TRUE(eof);
+}
+
+TEST_F(ParquetScanTest, NoRequestedColumnsReturnsRowsOnlyAcrossRowGroups) {
+    write_int_pair_parquet_file(_file_path, 2);
+    auto reader = create_reader();
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    auto request = std::make_shared<format::FileScanRequest>();
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t total_rows = 0;
+    bool eof = false;
+    while (!eof) {
+        Block block;
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        EXPECT_EQ(block.columns(), 0);
+        total_rows += rows;
+    }
+    EXPECT_EQ(total_rows, 6);
+}
+
+TEST_F(ParquetScanTest, ProfileCountersReflectPageIndexAndRangeGapPruning) {
+    write_page_index_parquet_file(_file_path);
+    RuntimeProfile profile("profile");
+    auto reader = create_reader(0, -1, &profile);
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<format::ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<format::FileScanRequest>();
+    request->non_predicate_columns = {field_projection(0)};
+    use_schema_order_positions(request.get(), schema);
+    format::FileColumnPredicateFilter column_filter;
+    column_filter.file_column_id = format::LocalColumnId(0);
+    column_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "id", schema[0].type, Field::create_field<TYPE_INT>(63), false));
+    request->column_predicate_filters.push_back(std::move(column_filter));
+    ASSERT_TRUE(reader->open(request).ok());
+
+    size_t total_rows = 0;
+    bool eof = false;
+    while (!eof) {
+        Block block = build_file_block(schema);
+        size_t rows = 0;
+        ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+        total_rows += rows;
+    }
+
+    EXPECT_EQ(total_rows, 64);
+    ASSERT_NE(profile.get_counter("RowGroupsTotalNum"), nullptr);
+    ASSERT_NE(profile.get_counter("RowGroupsReadNum"), nullptr);
+    ASSERT_NE(profile.get_counter("FilteredRowsByPage"), nullptr);
+    ASSERT_NE(profile.get_counter("SelectedRowRanges"), nullptr);
+    ASSERT_NE(profile.get_counter("PageIndexReadCalls"), nullptr);
+    ASSERT_NE(profile.get_counter("RawRowsRead"), nullptr);
+    ASSERT_NE(profile.get_counter("RangeGapSkippedRows"), nullptr);
+    EXPECT_EQ(profile.get_counter("RowGroupsTotalNum")->value(), 1);
+    EXPECT_EQ(profile.get_counter("RowGroupsReadNum")->value(), 1);
+    EXPECT_GT(profile.get_counter("FilteredRowsByPage")->value(), 0);
+    EXPECT_GT(profile.get_counter("SelectedRowRanges")->value(), 0);
+    EXPECT_GT(profile.get_counter("PageIndexReadCalls")->value(), 0);
+    EXPECT_EQ(profile.get_counter("RawRowsRead")->value(), 64);
+    EXPECT_GT(profile.get_counter("RangeGapSkippedRows")->value(), 0);
+}
+
+} // namespace
+} // namespace doris
diff --git a/be/test/format_v2/parquet/parquet_schema_test.cpp b/be/test/format_v2/parquet/parquet_schema_test.cpp
new file mode 100644
index 00000000000000..e620ed718efbf2
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_schema_test.cpp
@@ -0,0 +1,527 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <parquet/api/schema.h>
+
+#include <string>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/data_type/primitive_type.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+
+namespace doris::format::parquet {
+namespace {
+
+std::vector<std::unique_ptr<ParquetColumnSchema>> build_fields(
+        const std::vector<::parquet::schema::NodePtr>& nodes) {
+    auto schema =
+            ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, nodes);
+    ::parquet::SchemaDescriptor descriptor;
+    descriptor.Init(schema);
+    std::vector<std::unique_ptr<ParquetColumnSchema>> fields;
+    EXPECT_TRUE(build_parquet_column_schema(descriptor, &fields).ok());
+    return fields;
+}
+
+Status build_status(const std::vector<::parquet::schema::NodePtr>& nodes) {
+    auto schema =
+            ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, nodes);
+    ::parquet::SchemaDescriptor descriptor;
+    descriptor.Init(schema);
+    std::vector<std::unique_ptr<ParquetColumnSchema>> fields;
+    return build_parquet_column_schema(descriptor, &fields);
+}
+
+} // namespace
+
+TEST(ParquetSchemaTest, PrimitiveStateAndFieldIdArePreserved) {
+    const auto fields = build_fields({
+            ::parquet::schema::PrimitiveNode::Make("required_i32", ::parquet::Repetition::REQUIRED,
+                                                   ::parquet::Type::INT32),
+            ::parquet::schema::PrimitiveNode::Make("optional_i64", ::parquet::Repetition::OPTIONAL,
+                                                   ::parquet::Type::INT64,
+                                                   ::parquet::ConvertedType::NONE, -1, -1, -1, 42),
+    });
+
+    ASSERT_EQ(fields.size(), 2);
+    EXPECT_EQ(fields[0]->local_id, 0);
+    EXPECT_EQ(fields[0]->name, "required_i32");
+    EXPECT_EQ(fields[0]->kind, ParquetColumnSchemaKind::PRIMITIVE);
+    EXPECT_EQ(fields[0]->leaf_column_id, 0);
+    EXPECT_EQ(fields[0]->nullable_definition_level, 0);
+    EXPECT_FALSE(fields[0]->type->is_nullable());
+
+    EXPECT_EQ(fields[1]->local_id, 1);
+    EXPECT_EQ(fields[1]->parquet_field_id, 42);
+    EXPECT_EQ(fields[1]->leaf_column_id, 1);
+    EXPECT_EQ(fields[1]->nullable_definition_level, 1);
+    EXPECT_TRUE(fields[1]->type->is_nullable());
+}
+
+TEST(ParquetSchemaTest, PrimitiveTypeDescriptorCoversLogicalConvertedAndPhysicalFallback) {
+    const auto fields = build_fields({
+            ::parquet::schema::PrimitiveNode::Make(
+                    "ts", ::parquet::Repetition::OPTIONAL,
+                    ::parquet::LogicalType::Timestamp(false,
+                                                      ::parquet::LogicalType::TimeUnit::MICROS),
+                    ::parquet::Type::INT64),
+            ::parquet::schema::PrimitiveNode::Make("i8", ::parquet::Repetition::REQUIRED,
+                                                   ::parquet::Type::INT32,
+                                                   ::parquet::ConvertedType::INT_8),
+            ::parquet::schema::PrimitiveNode::Make("plain", ::parquet::Repetition::REQUIRED,
+                                                   ::parquet::Type::DOUBLE),
+    });
+
+    ASSERT_EQ(fields.size(), 3);
+    EXPECT_EQ(remove_nullable(fields[0]->type)->get_primitive_type(), TYPE_DATETIMEV2);
+    EXPECT_EQ(fields[0]->type_descriptor.time_unit, ParquetTimeUnit::MICROS);
+    EXPECT_EQ(fields[0]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS);
+    EXPECT_TRUE(fields[0]->type_descriptor.is_timestamp);
+    EXPECT_FALSE(fields[0]->type_descriptor.timestamp_is_adjusted_to_utc);
+
+    EXPECT_EQ(remove_nullable(fields[1]->type)->get_primitive_type(), TYPE_TINYINT);
+    EXPECT_EQ(fields[1]->type_descriptor.integer_bit_width, 8);
+    EXPECT_FALSE(fields[1]->type_descriptor.is_unsigned_integer);
+
+    EXPECT_EQ(remove_nullable(fields[2]->type)->get_primitive_type(), TYPE_DOUBLE);
+    EXPECT_EQ(fields[2]->type_descriptor.physical_type, ::parquet::Type::DOUBLE);
+    EXPECT_EQ(fields[2]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::NONE);
+}
+
+TEST(ParquetSchemaTest, StructMakesDataTypeChildrenNullableAndPropagatesLevels) {
+    const auto fields = build_fields({::parquet::schema::GroupNode::Make(
+            "s", ::parquet::Repetition::OPTIONAL,
+            {
+                    ::parquet::schema::PrimitiveNode::Make("a", ::parquet::Repetition::REQUIRED,
+                                                           ::parquet::Type::INT32),
+                    ::parquet::schema::PrimitiveNode::Make("b", ::parquet::Repetition::OPTIONAL,
+                                                           ::parquet::Type::BYTE_ARRAY,
+                                                           ::parquet::ConvertedType::UTF8),
+            })});
+
+    ASSERT_EQ(fields.size(), 1);
+    const auto& struct_schema = *fields[0];
+    EXPECT_EQ(struct_schema.kind, ParquetColumnSchemaKind::STRUCT);
+    EXPECT_EQ(struct_schema.nullable_definition_level, 1);
+    ASSERT_EQ(struct_schema.children.size(), 2);
+    EXPECT_EQ(struct_schema.children[0]->definition_level, 1);
+    EXPECT_EQ(struct_schema.children[1]->definition_level, 2);
+    EXPECT_EQ(struct_schema.max_definition_level, 2);
+
+    const auto& struct_type =
+            assert_cast<const DataTypeStruct&>(*remove_nullable(struct_schema.type));
+    ASSERT_EQ(struct_type.get_elements().size(), 2);
+    EXPECT_TRUE(struct_type.get_elements()[0]->is_nullable());
+    EXPECT_TRUE(struct_type.get_elements()[1]->is_nullable());
+}
+
+TEST(ParquetSchemaTest, ListCompatibilityRulesAndLevels) {
+    const auto standard_list = ::parquet::schema::GroupNode::Make(
+            "xs", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::LIST);
+    const auto structural_array = ::parquet::schema::GroupNode::Make(
+            "ys", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "array", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "value", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64)})},
+            ::parquet::ConvertedType::LIST);
+
+    const auto fields = build_fields({standard_list, structural_array});
+    ASSERT_EQ(fields.size(), 2);
+
+    const auto& xs = *fields[0];
+    EXPECT_EQ(xs.kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(xs.definition_level, 2);
+    EXPECT_EQ(xs.repetition_level, 1);
+    ASSERT_EQ(xs.children.size(), 1);
+    EXPECT_EQ(xs.children[0]->name, "element");
+    EXPECT_EQ(xs.children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE);
+    EXPECT_TRUE(xs.children[0]->type->is_nullable());
+    const auto& xs_type = assert_cast<const DataTypeArray&>(*remove_nullable(xs.type));
+    EXPECT_TRUE(xs_type.get_nested_type()->is_nullable());
+
+    const auto& ys = *fields[1];
+    EXPECT_EQ(ys.kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(ys.children.size(), 1);
+    EXPECT_EQ(ys.children[0]->kind, ParquetColumnSchemaKind::STRUCT);
+    EXPECT_EQ(remove_nullable(ys.children[0]->type)->get_primitive_type(), TYPE_STRUCT);
+}
+
+TEST(ParquetSchemaTest, LegacyListElementResolutionRulesArePreserved) {
+    const auto two_level_list = ::parquet::schema::GroupNode::Make(
+            "two_level", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::REPEATED,
+                                                    ::parquet::Type::INT32)},
+            ::parquet::ConvertedType::LIST);
+    const auto tuple_list = ::parquet::schema::GroupNode::Make(
+            "tuple_list", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "tuple_list_tuple", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "value", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64)})},
+            ::parquet::ConvertedType::LIST);
+    const auto multi_field_list = ::parquet::schema::GroupNode::Make(
+            "records", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("id", ::parquet::Repetition::REQUIRED,
+                                                            ::parquet::Type::INT32),
+                     ::parquet::schema::PrimitiveNode::Make("name", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8)})},
+            ::parquet::ConvertedType::LIST);
+    const auto fields = build_fields({two_level_list, tuple_list, multi_field_list});
+    ASSERT_EQ(fields.size(), 3);
+
+    const auto& two_level = *fields[0];
+    EXPECT_EQ(two_level.kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(two_level.definition_level, 2);
+    EXPECT_EQ(two_level.repetition_level, 1);
+    ASSERT_EQ(two_level.children.size(), 1);
+    EXPECT_EQ(two_level.children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE);
+    EXPECT_EQ(two_level.children[0]->name, "element");
+    EXPECT_EQ(remove_nullable(two_level.children[0]->type)->get_primitive_type(), TYPE_INT);
+
+    const auto& tuple = *fields[1];
+    ASSERT_EQ(tuple.children.size(), 1);
+    EXPECT_EQ(tuple.children[0]->kind, ParquetColumnSchemaKind::STRUCT);
+    EXPECT_EQ(tuple.children[0]->name, "element");
+    ASSERT_EQ(tuple.children[0]->children.size(), 1);
+    EXPECT_EQ(tuple.children[0]->children[0]->name, "value");
+
+    const auto& multi_field = *fields[2];
+    ASSERT_EQ(multi_field.children.size(), 1);
+    EXPECT_EQ(multi_field.children[0]->kind, ParquetColumnSchemaKind::STRUCT);
+    ASSERT_EQ(multi_field.children[0]->children.size(), 2);
+    EXPECT_EQ(multi_field.children[0]->children[0]->name, "id");
+    EXPECT_EQ(multi_field.children[0]->children[1]->name, "name");
+}
+
+TEST(ParquetSchemaTest, NestedRepeatedInsideListElementIsWrappedOnce) {
+    const auto list_with_repeated_child = ::parquet::schema::GroupNode::Make(
+            "outer", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "items", ::parquet::Repetition::REPEATED, ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::LIST);
+
+    const auto fields = build_fields({list_with_repeated_child});
+    ASSERT_EQ(fields.size(), 1);
+    const auto& outer = *fields[0];
+    EXPECT_EQ(outer.kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(outer.children.size(), 1);
+    const auto& element = *outer.children[0];
+    EXPECT_EQ(element.kind, ParquetColumnSchemaKind::STRUCT);
+    ASSERT_EQ(element.children.size(), 1);
+    EXPECT_EQ(element.children[0]->kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(element.children[0]->name, "items");
+    ASSERT_EQ(element.children[0]->children.size(), 1);
+    EXPECT_EQ(element.children[0]->children[0]->name, "element");
+}
+
+TEST(ParquetSchemaTest, ListWrapperWithLogicalAnnotationIsPreservedAsElement) {
+    const auto annotated_repeated_group = ::parquet::schema::GroupNode::Make(
+            "xs", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)},
+                    ::parquet::ConvertedType::LIST)},
+            ::parquet::ConvertedType::LIST);
+
+    EXPECT_FALSE(build_status({annotated_repeated_group}).ok());
+
+    const auto nested_list_wrapper = ::parquet::schema::GroupNode::Make(
+            "xs", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::GroupNode::Make(
+                            "list", ::parquet::Repetition::REPEATED,
+                            {::parquet::schema::PrimitiveNode::Make("value",
+                                                                    ::parquet::Repetition::OPTIONAL,
+                                                                    ::parquet::Type::INT32)})},
+                    ::parquet::ConvertedType::LIST)},
+            ::parquet::ConvertedType::LIST);
+
+    const auto fields = build_fields({nested_list_wrapper});
+    ASSERT_EQ(fields.size(), 1);
+    const auto& xs = *fields[0];
+    EXPECT_EQ(xs.kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(xs.children.size(), 1);
+    const auto& element = *xs.children[0];
+    EXPECT_EQ(element.kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(element.name, "element");
+    ASSERT_EQ(element.children.size(), 1);
+    EXPECT_EQ(element.children[0]->name, "element");
+    EXPECT_EQ(remove_nullable(element.children[0]->type)->get_primitive_type(), TYPE_INT);
+}
+
+TEST(ParquetSchemaTest, MapWrapperIsFoldedAndOptionalKeyIsAllowed) {
+    const auto fields = build_fields({::parquet::schema::GroupNode::Make(
+            "m", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::REPEATED,
+                    {
+                            ::parquet::schema::PrimitiveNode::Make(
+                                    "key", ::parquet::Repetition::OPTIONAL,
+                                    ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8),
+                            ::parquet::schema::PrimitiveNode::Make("value",
+                                                                   ::parquet::Repetition::OPTIONAL,
+                                                                   ::parquet::Type::INT32),
+                    })},
+            ::parquet::ConvertedType::MAP)});
+
+    ASSERT_EQ(fields.size(), 1);
+    const auto& map_schema = *fields[0];
+    EXPECT_EQ(map_schema.kind, ParquetColumnSchemaKind::MAP);
+    EXPECT_EQ(map_schema.definition_level, 2);
+    EXPECT_EQ(map_schema.repetition_level, 1);
+    ASSERT_EQ(map_schema.children.size(), 2);
+    EXPECT_EQ(map_schema.children[0]->name, "key");
+    EXPECT_EQ(map_schema.children[1]->name, "value");
+    EXPECT_TRUE(map_schema.children[0]->type->is_nullable());
+
+    const auto& map_type = assert_cast<const DataTypeMap&>(*remove_nullable(map_schema.type));
+    EXPECT_TRUE(map_type.get_key_type()->is_nullable());
+    EXPECT_TRUE(map_type.get_value_type()->is_nullable());
+}
+
+TEST(ParquetSchemaTest, StandardMapLevelsAndDataTypesAreBuiltFromEntryContext) {
+    const auto fields = build_fields({::parquet::schema::GroupNode::Make(
+            "m", ::parquet::Repetition::REQUIRED,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::REPEATED,
+                    {
+                            ::parquet::schema::PrimitiveNode::Make(
+                                    "key", ::parquet::Repetition::REQUIRED,
+                                    ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8),
+                            ::parquet::schema::PrimitiveNode::Make("value",
+                                                                   ::parquet::Repetition::OPTIONAL,
+                                                                   ::parquet::Type::INT32),
+                    })},
+            ::parquet::ConvertedType::MAP)});
+
+    ASSERT_EQ(fields.size(), 1);
+    const auto& map_schema = *fields[0];
+    EXPECT_FALSE(map_schema.type->is_nullable());
+    EXPECT_EQ(map_schema.definition_level, 1);
+    EXPECT_EQ(map_schema.repetition_level, 1);
+    EXPECT_EQ(map_schema.repeated_repetition_level, 1);
+    EXPECT_EQ(map_schema.max_definition_level, 2);
+    EXPECT_EQ(map_schema.max_repetition_level, 1);
+    ASSERT_EQ(map_schema.children.size(), 2);
+    EXPECT_EQ(map_schema.children[0]->definition_level, 1);
+    EXPECT_EQ(map_schema.children[0]->repetition_level, 1);
+    EXPECT_EQ(map_schema.children[1]->definition_level, 2);
+    EXPECT_EQ(map_schema.children[1]->nullable_definition_level, 2);
+
+    const auto& map_type = assert_cast<const DataTypeMap&>(*remove_nullable(map_schema.type));
+    EXPECT_TRUE(map_type.get_key_type()->is_nullable());
+    EXPECT_TRUE(map_type.get_value_type()->is_nullable());
+}
+
+TEST(ParquetSchemaTest, BareRepeatedFieldsAreWrappedAsLists) {
+    const auto fields = build_fields({
+            ::parquet::schema::PrimitiveNode::Make("items", ::parquet::Repetition::REPEATED,
+                                                   ::parquet::Type::INT32),
+            ::parquet::schema::GroupNode::Make(
+                    "links", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("url", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8),
+                     ::parquet::schema::PrimitiveNode::Make("rank", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::INT32)}),
+    });
+
+    ASSERT_EQ(fields.size(), 2);
+    EXPECT_EQ(fields[0]->kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(fields[0]->children.size(), 1);
+    EXPECT_EQ(fields[0]->children[0]->kind, ParquetColumnSchemaKind::PRIMITIVE);
+    EXPECT_EQ(fields[0]->children[0]->name, "element");
+
+    EXPECT_EQ(fields[1]->kind, ParquetColumnSchemaKind::LIST);
+    ASSERT_EQ(fields[1]->children.size(), 1);
+    EXPECT_EQ(fields[1]->children[0]->kind, ParquetColumnSchemaKind::STRUCT);
+    EXPECT_EQ(fields[1]->children[0]->name, "element");
+}
+
+TEST(ParquetSchemaTest, DeepLevelChainPropagatesDefinitionAndRepetitionLevels) {
+    const auto fields = build_fields({::parquet::schema::GroupNode::Make(
+            "s", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "inner", ::parquet::Repetition::OPTIONAL,
+                    {::parquet::schema::PrimitiveNode::Make(
+                            "items", ::parquet::Repetition::REPEATED, ::parquet::Type::INT32)})})});
+
+    ASSERT_EQ(fields.size(), 1);
+    const auto& s = *fields[0];
+    EXPECT_EQ(s.definition_level, 1);
+    EXPECT_EQ(s.nullable_definition_level, 1);
+    ASSERT_EQ(s.children.size(), 1);
+    const auto& inner = *s.children[0];
+    EXPECT_EQ(inner.definition_level, 2);
+    EXPECT_EQ(inner.nullable_definition_level, 2);
+    ASSERT_EQ(inner.children.size(), 1);
+    const auto& items = *inner.children[0];
+    EXPECT_EQ(items.kind, ParquetColumnSchemaKind::LIST);
+    EXPECT_EQ(items.definition_level, 3);
+    EXPECT_EQ(items.repetition_level, 1);
+    EXPECT_EQ(items.repeated_ancestor_definition_level, 3);
+    EXPECT_EQ(items.repeated_repetition_level, 1);
+    EXPECT_EQ(items.max_definition_level, 3);
+    EXPECT_EQ(items.max_repetition_level, 1);
+    ASSERT_EQ(items.children.size(), 1);
+    EXPECT_EQ(items.children[0]->definition_level, 3);
+    EXPECT_EQ(items.children[0]->repetition_level, 1);
+}
+
+TEST(ParquetSchemaTest, BuildEntryValidatesNullPointerAndEmptyRoot) {
+    auto empty_root = ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED,
+                                                         ::parquet::schema::NodeVector {});
+    ::parquet::SchemaDescriptor descriptor;
+    descriptor.Init(empty_root);
+
+    EXPECT_FALSE(build_parquet_column_schema(descriptor, nullptr).ok());
+
+    std::vector<std::unique_ptr<ParquetColumnSchema>> fields;
+    ASSERT_TRUE(build_parquet_column_schema(descriptor, &fields).ok());
+    EXPECT_TRUE(fields.empty());
+}
+
+TEST(ParquetSchemaTest, RejectInvalidListMapAndUnsupportedTime) {
+    const auto bad_list = ::parquet::schema::GroupNode::Make(
+            "bad_list", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL,
+                                                    ::parquet::Type::INT32)},
+            ::parquet::ConvertedType::LIST);
+    EXPECT_FALSE(build_status({bad_list}).ok());
+
+    const auto bad_map = ::parquet::schema::GroupNode::Make(
+            "bad_map", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::PrimitiveNode::Make("entry", ::parquet::Repetition::REPEATED,
+                                                    ::parquet::Type::INT32)},
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({bad_map}).ok());
+
+    const auto converted_time = ::parquet::schema::PrimitiveNode::Make(
+            "time_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32,
+            ::parquet::ConvertedType::TIME_MILLIS);
+    const auto status = build_status({converted_time});
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Parquet TIME with isAdjustedToUTC=true is not supported"),
+              std::string::npos);
+}
+
+TEST(ParquetSchemaTest, RejectAdditionalInvalidListAndMapLayouts) {
+    const auto zero_child_list = ::parquet::schema::GroupNode::Make(
+            "zero_child_list", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make("list", ::parquet::Repetition::REPEATED,
+                                                ::parquet::schema::NodeVector {})},
+            ::parquet::ConvertedType::LIST);
+    EXPECT_FALSE(build_status({zero_child_list}).ok());
+
+    const auto repeated_list = ::parquet::schema::GroupNode::Make(
+            "repeated_list", ::parquet::Repetition::REPEATED,
+            {::parquet::schema::GroupNode::Make(
+                    "list", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("item", ::parquet::Repetition::OPTIONAL,
+                                                            ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::LIST);
+    EXPECT_FALSE(build_status({repeated_list}).ok());
+
+    const auto map_with_two_fields = ::parquet::schema::GroupNode::Make(
+            "bad_map", ::parquet::Repetition::OPTIONAL,
+            {
+                    ::parquet::schema::GroupNode::Make(
+                            "entry1", ::parquet::Repetition::REPEATED,
+                            {::parquet::schema::PrimitiveNode::Make(
+                                     "key", ::parquet::Repetition::REQUIRED,
+                                     ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8),
+                             ::parquet::schema::PrimitiveNode::Make("value",
+                                                                    ::parquet::Repetition::OPTIONAL,
+                                                                    ::parquet::Type::INT32)}),
+                    ::parquet::schema::GroupNode::Make(
+                            "entry2", ::parquet::Repetition::REPEATED,
+                            {::parquet::schema::PrimitiveNode::Make(
+                                     "key", ::parquet::Repetition::REQUIRED,
+                                     ::parquet::Type::BYTE_ARRAY, ::parquet::ConvertedType::UTF8),
+                             ::parquet::schema::PrimitiveNode::Make("value",
+                                                                    ::parquet::Repetition::OPTIONAL,
+                                                                    ::parquet::Type::INT32)}),
+            },
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({map_with_two_fields}).ok());
+
+    const auto non_repeated_map_entry = ::parquet::schema::GroupNode::Make(
+            "bad_map", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::OPTIONAL,
+                    {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8),
+                     ::parquet::schema::PrimitiveNode::Make(
+                             "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({non_repeated_map_entry}).ok());
+
+    const auto map_entry_with_one_child = ::parquet::schema::GroupNode::Make(
+            "bad_map", ::parquet::Repetition::OPTIONAL,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8)})},
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({map_entry_with_one_child}).ok());
+
+    const auto repeated_map = ::parquet::schema::GroupNode::Make(
+            "repeated_map", ::parquet::Repetition::REPEATED,
+            {::parquet::schema::GroupNode::Make(
+                    "key_value", ::parquet::Repetition::REPEATED,
+                    {::parquet::schema::PrimitiveNode::Make("key", ::parquet::Repetition::REQUIRED,
+                                                            ::parquet::Type::BYTE_ARRAY,
+                                                            ::parquet::ConvertedType::UTF8),
+                     ::parquet::schema::PrimitiveNode::Make(
+                             "value", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT32)})},
+            ::parquet::ConvertedType::MAP);
+    EXPECT_FALSE(build_status({repeated_map}).ok());
+}
+
+TEST(ParquetSchemaTest, LogicalUtcTimeIsRejected) {
+    const auto adjusted_time = ::parquet::schema::PrimitiveNode::Make(
+            "time_ms", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::MILLIS),
+            ::parquet::Type::INT32);
+    const auto status = build_status({adjusted_time});
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(status.to_string().find("Parquet TIME with isAdjustedToUTC=true is not supported"),
+              std::string::npos);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_serde_reader_test.cpp b/be/test/format_v2/parquet/parquet_serde_reader_test.cpp
new file mode 100644
index 00000000000000..c35138e3263723
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_serde_reader_test.cpp
@@ -0,0 +1,459 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <cmath>
+#include <filesystem>
+#include <functional>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/column/column_decimal.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/types.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "format_v2/parquet/reader/column_reader.h"
+
+namespace doris::format::parquet {
+namespace {
+
+constexpr int64_t ROW_COUNT = 5;
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+class ParquetSerdeReaderTest : public testing::Test {
+protected:
+    void SetUp() override {
+        _test_dir = std::filesystem::temp_directory_path() / "doris_parquet_serde_reader_test";
+        std::filesystem::remove_all(_test_dir);
+        std::filesystem::create_directories(_test_dir);
+        _file_path = (_test_dir / "serde.parquet").string();
+        write_parquet_file();
+        open_file(_file_path);
+    }
+
+    void TearDown() override { std::filesystem::remove_all(_test_dir); }
+
+    template <typename Builder, typename Value>
+    std::shared_ptr<arrow::Array> build_required_array(const std::vector<Value>& values) {
+        Builder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_int32_array() {
+        arrow::Int32Builder builder;
+        EXPECT_TRUE(builder.Append(1).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(3).ok());
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(5).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_nullable_float16_array() {
+        arrow::HalfFloatBuilder builder;
+        EXPECT_TRUE(builder.AppendNull().ok());
+        EXPECT_TRUE(builder.Append(0x0000).ok());
+        EXPECT_TRUE(builder.Append(0x8000).ok());
+        EXPECT_TRUE(builder.Append(0x3E00).ok());
+        EXPECT_TRUE(builder.Append(0x7E00).ok());
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_binary_array(const std::vector<std::string>& values) {
+        arrow::BinaryBuilder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(reinterpret_cast<const uint8_t*>(value.data()),
+                                       static_cast<int32_t>(value.size()))
+                                .ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+        arrow::StringBuilder builder;
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_fixed_binary_array(
+            const std::shared_ptr<arrow::DataType>& type, const std::vector<std::string>& values) {
+        arrow::FixedSizeBinaryBuilder builder(type, arrow::default_memory_pool());
+        for (const auto& value : values) {
+            EXPECT_TRUE(builder.Append(reinterpret_cast<const uint8_t*>(value.data())).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_timestamp_array(
+            const std::shared_ptr<arrow::DataType>& type, const std::vector<int64_t>& values) {
+        arrow::TimestampBuilder builder(type, arrow::default_memory_pool());
+        for (const auto value : values) {
+            EXPECT_TRUE(builder.Append(value).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    std::shared_ptr<arrow::Array> build_decimal_array(const std::shared_ptr<arrow::DataType>& type,
+                                                      const std::vector<int64_t>& values) {
+        arrow::Decimal128Builder builder(type, arrow::default_memory_pool());
+        for (const auto value : values) {
+            EXPECT_TRUE(builder.Append(arrow::Decimal128(value)).ok());
+        }
+        return finish_array(&builder);
+    }
+
+    void add_field(const std::shared_ptr<arrow::Field>& field,
+                   std::shared_ptr<arrow::Array> array) {
+        _arrow_fields.push_back(field);
+        _arrays.push_back(std::move(array));
+    }
+
+    void write_table(const std::string& file_path, const std::shared_ptr<arrow::Table>& table,
+                     std::shared_ptr<::parquet::ArrowWriterProperties> arrow_properties = nullptr) {
+        auto file_result = arrow::io::FileOutputStream::Open(file_path);
+        ASSERT_TRUE(file_result.ok()) << file_result.status();
+        ::parquet::WriterProperties::Builder writer_builder;
+        writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+        writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+        writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+        if (arrow_properties == nullptr) {
+            ::parquet::ArrowWriterProperties::Builder arrow_builder;
+            arrow_properties = arrow_builder.build();
+        }
+        PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(
+                *table, arrow::default_memory_pool(), *file_result, ROW_COUNT,
+                writer_builder.build(), std::move(arrow_properties)));
+    }
+
+    void write_parquet_file() {
+        add_field(arrow::field("bool_col", arrow::boolean(), false),
+                  build_required_array<arrow::BooleanBuilder, bool>(
+                          {true, false, true, false, true}));
+        add_field(arrow::field("int32_col", arrow::int32(), false),
+                  build_required_array<arrow::Int32Builder, int32_t>({10, 20, 30, 40, 50}));
+        add_field(arrow::field("int64_col", arrow::int64(), false),
+                  build_required_array<arrow::Int64Builder, int64_t>(
+                          {10000000000L, -9L, 42L, 77L, 123L}));
+        add_field(arrow::field("uint32_col", arrow::uint32(), false),
+                  build_required_array<arrow::UInt32Builder, uint32_t>(
+                          {0U, 1U, 1U << 31, std::numeric_limits<uint32_t>::max(), 42U}));
+        add_field(arrow::field("uint64_col", arrow::uint64(), false),
+                  build_required_array<arrow::UInt64Builder, uint64_t>(
+                          {0ULL, 1ULL, 1ULL << 63, std::numeric_limits<uint64_t>::max(), 42ULL}));
+        add_field(arrow::field("float_col", arrow::float32(), false),
+                  build_required_array<arrow::FloatBuilder, float>(
+                          {1.5F, -2.25F, 3.0F, 4.5F, 5.75F}));
+        add_field(arrow::field("double_col", arrow::float64(), false),
+                  build_required_array<arrow::DoubleBuilder, double>({3.5, -4.75, 6.0, 7.25, 8.5}));
+        add_field(arrow::field("nullable_float16_col", arrow::float16(), true),
+                  build_nullable_float16_array());
+        add_field(arrow::field("binary_col", arrow::binary(), false),
+                  build_binary_array({"bin_a", "bin_b", "bin_c", "bin_d", "bin_e"}));
+        add_field(arrow::field("string_col", arrow::utf8(), false),
+                  build_string_array({"alpha", "beta", "gamma", "delta", "epsilon"}));
+        add_field(arrow::field("fixed_binary_col", arrow::fixed_size_binary(4), false),
+                  build_fixed_binary_array(arrow::fixed_size_binary(4),
+                                           {"aaaa", "bbbb", "cccc", "dddd", "eeee"}));
+        add_field(arrow::field("date_col", arrow::date32(), false),
+                  build_required_array<arrow::Date32Builder, int32_t>({0, 1, 18628, 18629, 18630}));
+        add_field(arrow::field("timestamp_millis_col", arrow::timestamp(arrow::TimeUnit::MILLI),
+                               false),
+                  build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MILLI),
+                                        {0, 1234, 1609459200000, 1609459201000, -1}));
+        add_field(arrow::field("timestamp_micros_col", arrow::timestamp(arrow::TimeUnit::MICRO),
+                               false),
+                  build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                        {0, 1234567, 1609459200000000, 1609459201000000, -1}));
+        add_field(arrow::field("timestamp_micros_utc_col",
+                               arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), false),
+                  build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"),
+                                        {0, 1234567, 1609459200000000, 1609459201000000, -1}));
+        add_field(arrow::field("decimal_fixed_binary_9_2_col", arrow::decimal128(9, 2), false),
+                  build_decimal_array(arrow::decimal128(9, 2), {12345, -67, 0, 987, 1000}));
+        add_field(arrow::field("decimal_fixed_binary_18_6_col", arrow::decimal128(18, 6), false),
+                  build_decimal_array(arrow::decimal128(18, 6),
+                                      {1234567, -670000, 0, 9870000, 1000000}));
+        add_field(arrow::field("nullable_int_col", arrow::int32(), true),
+                  build_nullable_int32_array());
+
+        write_table(_file_path, arrow::Table::Make(arrow::schema(_arrow_fields), _arrays));
+    }
+
+    void open_file(const std::string& file_path) {
+        _file_reader = ::parquet::ParquetFileReader::OpenFile(file_path, false);
+        ASSERT_NE(_file_reader, nullptr);
+        ASSERT_EQ(_file_reader->metadata()->num_row_groups(), 1);
+        _row_group = _file_reader->RowGroup(0);
+        ASSERT_NE(_row_group, nullptr);
+        auto schema_descriptor = _file_reader->metadata()->schema();
+        ASSERT_NE(schema_descriptor, nullptr);
+        auto st = build_parquet_column_schema(*schema_descriptor, &_fields);
+        ASSERT_TRUE(st.ok()) << st;
+    }
+
+    size_t find_field_idx(const std::string& name) const {
+        for (size_t field_idx = 0; field_idx < _fields.size(); ++field_idx) {
+            if (_fields[field_idx]->name == name) {
+                return field_idx;
+            }
+        }
+        ADD_FAILURE() << "Cannot find parquet serde test field " << name;
+        return _fields.size();
+    }
+
+    std::unique_ptr<ParquetColumnReader> create_reader(size_t field_idx) const {
+        ParquetColumnReaderFactory factory(_row_group, _file_reader->metadata()->num_columns());
+        std::unique_ptr<ParquetColumnReader> reader;
+        auto st = factory.create(*_fields[field_idx], &reader);
+        EXPECT_TRUE(st.ok()) << st;
+        return reader;
+    }
+
+    template <typename Validator>
+    void read_and_validate(const std::string& name, Validator validator) const {
+        const auto field_idx = find_field_idx(name);
+        ASSERT_TRUE(supports_record_reader(_fields[field_idx]->type_descriptor));
+        auto reader = create_reader(field_idx);
+        ASSERT_NE(reader, nullptr);
+        MutableColumnPtr column = reader->type()->create_column();
+        int64_t rows_read = 0;
+        auto st = reader->read(ROW_COUNT, column, &rows_read);
+        ASSERT_TRUE(st.ok()) << st;
+        ASSERT_EQ(rows_read, ROW_COUNT);
+        ASSERT_EQ(column->size(), ROW_COUNT);
+        validator(*_fields[field_idx], *column);
+    }
+
+    std::filesystem::path _test_dir;
+    std::string _file_path;
+    std::unique_ptr<::parquet::ParquetFileReader> _file_reader;
+    std::shared_ptr<::parquet::RowGroupReader> _row_group;
+    std::vector<std::unique_ptr<ParquetColumnSchema>> _fields;
+    std::vector<std::shared_ptr<arrow::Field>> _arrow_fields;
+    std::vector<std::shared_ptr<arrow::Array>> _arrays;
+};
+
+TEST_F(ParquetSerdeReaderTest, ReadAllSupportedPhysicalAndLogicalTypes) {
+    read_and_validate("bool_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::BOOLEAN);
+        const auto& values = assert_cast<const ColumnBool&>(column);
+        EXPECT_EQ(values.get_element(0), 1);
+        EXPECT_EQ(values.get_element(1), 0);
+        EXPECT_EQ(values.get_element(4), 1);
+    });
+    read_and_validate("int32_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32);
+        const auto& values = assert_cast<const ColumnInt32&>(column);
+        EXPECT_EQ(values.get_element(0), 10);
+        EXPECT_EQ(values.get_element(4), 50);
+    });
+    read_and_validate("int64_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+        const auto& values = assert_cast<const ColumnInt64&>(column);
+        EXPECT_EQ(values.get_element(0), 10000000000L);
+        EXPECT_EQ(values.get_element(1), -9L);
+    });
+    read_and_validate("uint32_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32);
+        EXPECT_TRUE(schema.type_descriptor.is_unsigned_integer);
+        EXPECT_EQ(schema.type_descriptor.integer_bit_width, 32);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_BIGINT);
+        const auto& values = assert_cast<const ColumnInt64&>(column);
+        EXPECT_EQ(values.get_element(2), 2147483648L);
+        EXPECT_EQ(values.get_element(3),
+                  static_cast<int64_t>(std::numeric_limits<uint32_t>::max()));
+    });
+    read_and_validate("uint64_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+        EXPECT_TRUE(schema.type_descriptor.is_unsigned_integer);
+        EXPECT_EQ(schema.type_descriptor.integer_bit_width, 64);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_LARGEINT);
+        const auto& values = assert_cast<const ColumnInt128&>(column);
+        EXPECT_EQ(values.get_element(2), static_cast<int128_t>(1) << 63);
+        EXPECT_EQ(values.get_element(3),
+                  static_cast<int128_t>(std::numeric_limits<uint64_t>::max()));
+    });
+    read_and_validate("float_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FLOAT);
+        const auto& values = assert_cast<const ColumnFloat32&>(column);
+        EXPECT_FLOAT_EQ(values.get_element(0), 1.5F);
+        EXPECT_FLOAT_EQ(values.get_element(1), -2.25F);
+    });
+    read_and_validate("double_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::DOUBLE);
+        const auto& values = assert_cast<const ColumnFloat64&>(column);
+        EXPECT_DOUBLE_EQ(values.get_element(0), 3.5);
+        EXPECT_DOUBLE_EQ(values.get_element(1), -4.75);
+    });
+    read_and_validate("nullable_float16_col", [](const ParquetColumnSchema& schema,
+                                                 const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+        EXPECT_EQ(schema.type_descriptor.fixed_length, 2);
+        EXPECT_EQ(schema.type_descriptor.extra_type_info, ParquetExtraTypeInfo::FLOAT16);
+        EXPECT_FALSE(schema.type_descriptor.is_string_like);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_FLOAT);
+        const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+        const auto& values = assert_cast<const ColumnFloat32&>(nullable_column.get_nested_column());
+        ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+        EXPECT_TRUE(nullable_column.is_null_at(0));
+        EXPECT_FLOAT_EQ(values.get_element(1), 0.0F);
+        EXPECT_FALSE(std::signbit(values.get_element(1)));
+        EXPECT_FLOAT_EQ(values.get_element(2), -0.0F);
+        EXPECT_TRUE(std::signbit(values.get_element(2)));
+        EXPECT_FLOAT_EQ(values.get_element(3), 1.5F);
+        EXPECT_TRUE(std::isnan(values.get_element(4)));
+    });
+    read_and_validate("binary_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::BYTE_ARRAY);
+        const auto& values = assert_cast<const ColumnString&>(column);
+        EXPECT_EQ(values.get_data_at(0).to_string(), "bin_a");
+        EXPECT_EQ(values.get_data_at(3).to_string(), "bin_d");
+    });
+    read_and_validate("string_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_TRUE(schema.type_descriptor.is_string_like);
+        const auto& values = assert_cast<const ColumnString&>(column);
+        EXPECT_EQ(values.get_data_at(0).to_string(), "alpha");
+        EXPECT_EQ(values.get_data_at(4).to_string(), "epsilon");
+    });
+    read_and_validate("fixed_binary_col", [](const ParquetColumnSchema& schema,
+                                             const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+        EXPECT_EQ(schema.type_descriptor.fixed_length, 4);
+        const auto& values = assert_cast<const ColumnString&>(column);
+        EXPECT_EQ(values.get_data_at(0).to_string(), "aaaa");
+        EXPECT_EQ(values.get_data_at(2).to_string(), "cccc");
+    });
+    read_and_validate("date_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT32);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATEV2);
+        EXPECT_EQ(schema.type->to_string(column, 0), "1970-01-01");
+        EXPECT_EQ(schema.type->to_string(column, 2), "2021-01-01");
+    });
+    read_and_validate(
+            "timestamp_millis_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+                EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+                EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2);
+                EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234");
+                EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999");
+            });
+    read_and_validate(
+            "timestamp_micros_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+                EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+                EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2);
+                EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234567");
+                EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999999");
+            });
+    read_and_validate("timestamp_micros_utc_col", [](const ParquetColumnSchema& schema,
+                                                     const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::INT64);
+        EXPECT_TRUE(schema.type_descriptor.timestamp_is_adjusted_to_utc);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DATETIMEV2);
+        EXPECT_EQ(schema.type->to_string(column, 1), "1970-01-01 00:00:01.234567");
+        EXPECT_EQ(schema.type->to_string(column, 4), "1969-12-31 23:59:59.999999");
+    });
+    read_and_validate("decimal_fixed_binary_9_2_col", [](const ParquetColumnSchema& schema,
+                                                         const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+        EXPECT_TRUE(schema.type_descriptor.is_decimal);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DECIMAL32);
+        const auto& values = assert_cast<const ColumnDecimal32&>(column);
+        EXPECT_EQ(values.get_element(0), Decimal32(12345));
+        EXPECT_EQ(schema.type->to_string(column, 0), "123.45");
+    });
+    read_and_validate("decimal_fixed_binary_18_6_col", [](const ParquetColumnSchema& schema,
+                                                          const IColumn& column) {
+        EXPECT_EQ(schema.type_descriptor.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+        EXPECT_TRUE(schema.type_descriptor.is_decimal);
+        EXPECT_EQ(remove_nullable(schema.type)->get_primitive_type(), TYPE_DECIMAL64);
+        const auto& values = assert_cast<const ColumnDecimal64&>(column);
+        EXPECT_EQ(values.get_element(0), Decimal64(1234567));
+        EXPECT_EQ(schema.type->to_string(column, 0), "1.234567");
+    });
+    read_and_validate(
+            "nullable_int_col", [](const ParquetColumnSchema& schema, const IColumn& column) {
+                EXPECT_TRUE(schema.type->is_nullable());
+                const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+                const auto& nested_column =
+                        assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+                ASSERT_EQ(nullable_column.size(), ROW_COUNT);
+                EXPECT_FALSE(nullable_column.is_null_at(0));
+                EXPECT_TRUE(nullable_column.is_null_at(1));
+                EXPECT_FALSE(nullable_column.is_null_at(2));
+                EXPECT_TRUE(nullable_column.is_null_at(3));
+                EXPECT_EQ(nested_column.get_element(0), 1);
+                EXPECT_EQ(nested_column.get_element(2), 3);
+            });
+}
+
+TEST_F(ParquetSerdeReaderTest, ReadInt96TimestampAsDateTimeV2) {
+    const auto file_path = (_test_dir / "int96_timestamp.parquet").string();
+    auto field = arrow::field("col_datetime", arrow::timestamp(arrow::TimeUnit::MICRO), false);
+    auto array = build_timestamp_array(arrow::timestamp(arrow::TimeUnit::MICRO),
+                                       {0, 1234567, 1609459200000000, 1609459201000000, -1});
+    auto table = arrow::Table::Make(arrow::schema({field}), {array});
+
+    ::parquet::ArrowWriterProperties::Builder arrow_builder;
+    arrow_builder.enable_force_write_int96_timestamps();
+    _fields.clear();
+    _file_reader.reset();
+    _row_group.reset();
+    write_table(file_path, table, arrow_builder.build());
+    open_file(file_path);
+
+    ASSERT_EQ(_fields.size(), 1);
+    EXPECT_EQ(_fields[0]->type_descriptor.physical_type, ::parquet::Type::INT96);
+    EXPECT_EQ(_fields[0]->type_descriptor.extra_type_info, ParquetExtraTypeInfo::IMPALA_TIMESTAMP);
+    ASSERT_TRUE(supports_record_reader(_fields[0]->type_descriptor));
+    ASSERT_EQ(remove_nullable(_fields[0]->type)->get_primitive_type(), TYPE_DATETIMEV2);
+
+    auto reader = create_reader(0);
+    ASSERT_NE(reader, nullptr);
+    auto column = _fields[0]->type->create_column();
+    int64_t rows_read = 0;
+    ASSERT_TRUE(reader->read(ROW_COUNT, column, &rows_read).ok());
+    ASSERT_EQ(rows_read, ROW_COUNT);
+    EXPECT_EQ(_fields[0]->type->to_string(*column, 0), "1970-01-01 00:00:00.000000");
+    EXPECT_EQ(_fields[0]->type->to_string(*column, 1), "1970-01-01 00:00:01.234567");
+    EXPECT_EQ(_fields[0]->type->to_string(*column, 2), "2021-01-01 00:00:00.000000");
+    EXPECT_EQ(_fields[0]->type->to_string(*column, 4), "1969-12-31 23:59:59.999999");
+}
+
+} // namespace
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/parquet/parquet_statistics_test.cpp b/be/test/format_v2/parquet/parquet_statistics_test.cpp
new file mode 100644
index 00000000000000..f2ae2448013d26
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_statistics_test.cpp
@@ -0,0 +1,460 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_statistics.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <cctz/time_zone.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/bloom_filter.h>
+
+#include <memory>
+#include <numeric>
+#include <optional>
+#include <string>
+#include <vector>
+
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "format_v2/file_reader.h"
+#include "format_v2/parquet/parquet_column_schema.h"
+#include "storage/predicate/accept_null_predicate.h"
+#include "storage/predicate/null_predicate.h"
+#include "storage/predicate/predicate_creator.h"
+
+namespace doris {
+namespace {
+
+format::parquet::ParquetColumnSchema primitive_bloom_schema(const DataTypePtr& type) {
+    format::parquet::ParquetColumnSchema schema;
+    schema.local_id = 0;
+    schema.name = "c0";
+    schema.type = type;
+    schema.leaf_column_id = 0;
+    schema.kind = format::parquet::ParquetColumnSchemaKind::PRIMITIVE;
+    return schema;
+}
+
+format::FileColumnPredicateFilter bloom_filter_with_predicate(
+        const std::shared_ptr<ColumnPredicate>& predicate) {
+    format::FileColumnPredicateFilter filter;
+    filter.file_column_id = format::LocalColumnId(0);
+    filter.target = format::FileNestedPredicateTarget(filter.file_column_id);
+    filter.predicates.push_back(predicate);
+    return filter;
+}
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> int32_array(const std::vector<std::optional<int32_t>>& values) {
+    arrow::Int32Builder builder;
+    for (const auto& value : values) {
+        if (value.has_value()) {
+            EXPECT_TRUE(builder.Append(*value).ok());
+        } else {
+            EXPECT_TRUE(builder.AppendNull().ok());
+        }
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> uint32_array(const std::vector<uint32_t>& values) {
+    arrow::UInt32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> timestamp_array(const std::vector<int64_t>& values) {
+    arrow::TimestampBuilder builder(arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"),
+                                    arrow::default_memory_pool());
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::unique_ptr<::parquet::ParquetFileReader> make_reader(
+        const std::shared_ptr<arrow::Table>& table, int64_t row_group_size, bool enable_dictionary,
+        bool enable_statistics) {
+    auto out_result = arrow::io::BufferOutputStream::Create();
+    EXPECT_TRUE(out_result.ok());
+    auto out = *out_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    if (enable_dictionary) {
+        builder.enable_dictionary();
+    } else {
+        builder.disable_dictionary();
+    }
+    if (!enable_statistics) {
+        builder.disable_statistics();
+    }
+    EXPECT_TRUE(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                             row_group_size, builder.build())
+                        .ok());
+    auto buffer_result = out->Finish();
+    EXPECT_TRUE(buffer_result.ok());
+    return ::parquet::ParquetFileReader::Open(
+            std::make_shared<arrow::io::BufferReader>(*buffer_result));
+}
+
+std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> build_file_schema(
+        const ::parquet::ParquetFileReader& reader) {
+    std::vector<std::unique_ptr<format::parquet::ParquetColumnSchema>> file_schema;
+    EXPECT_TRUE(
+            format::parquet::build_parquet_column_schema(*reader.metadata()->schema(), &file_schema)
+                    .ok());
+    return file_schema;
+}
+
+format::FileScanRequest request_with_filter(format::FileColumnPredicateFilter filter) {
+    format::FileScanRequest request;
+    request.column_predicate_filters.push_back(std::move(filter));
+    return request;
+}
+
+::parquet::BlockSplitBloomFilter bloom_filter_for_int32_values(const std::vector<int32_t>& values) {
+    ::parquet::BlockSplitBloomFilter bloom_filter;
+    bloom_filter.Init(::parquet::BlockSplitBloomFilter::kMinimumBloomFilterBytes);
+    for (const auto value : values) {
+        bloom_filter.InsertHash(bloom_filter.Hash(value));
+    }
+    return bloom_filter;
+}
+
+TEST(ParquetStatisticsTransformTest, ConvertsMinMaxNullCountUnsignedStringAndTimestamp) {
+    auto table = arrow::Table::Make(
+            arrow::schema({
+                    arrow::field("i", arrow::int32(), true),
+                    arrow::field("u", arrow::uint32(), false),
+                    arrow::field("s", arrow::utf8(), false),
+                    arrow::field("ts", arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"), false),
+            }),
+            {int32_array({1, std::nullopt, 5}), uint32_array({7, 9, 11}),
+             string_array({"alpha", "beta", "omega"}), timestamp_array({1000, 2000, 3000})});
+    auto reader = make_reader(table, 3, false, true);
+    auto schema = build_file_schema(*reader);
+    auto row_group = reader->metadata()->RowGroup(0);
+
+    const auto int_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *schema[0], row_group->ColumnChunk(0)->statistics());
+    EXPECT_TRUE(int_stats.has_min_max);
+    EXPECT_TRUE(int_stats.has_null_count);
+    EXPECT_TRUE(int_stats.has_null);
+    EXPECT_TRUE(int_stats.has_not_null);
+    EXPECT_EQ(int_stats.min_value.get<TYPE_INT>(), 1);
+    EXPECT_EQ(int_stats.max_value.get<TYPE_INT>(), 5);
+
+    const auto uint_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *schema[1], row_group->ColumnChunk(1)->statistics());
+    EXPECT_TRUE(uint_stats.has_min_max);
+    EXPECT_EQ(uint_stats.min_value.get<TYPE_BIGINT>(), 7);
+    EXPECT_EQ(uint_stats.max_value.get<TYPE_BIGINT>(), 11);
+
+    const auto string_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *schema[2], row_group->ColumnChunk(2)->statistics());
+    EXPECT_TRUE(string_stats.has_min_max);
+    EXPECT_EQ(string_stats.min_value.get<TYPE_STRING>(), "alpha");
+    EXPECT_EQ(string_stats.max_value.get<TYPE_STRING>(), "omega");
+
+    auto utc = cctz::utc_time_zone();
+    const auto timestamp_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *schema[3], row_group->ColumnChunk(3)->statistics(), &utc);
+    EXPECT_TRUE(timestamp_stats.has_min_max);
+    EXPECT_EQ(timestamp_stats.min_value.get_type(), TYPE_DATETIMEV2);
+    EXPECT_EQ(timestamp_stats.max_value.get_type(), TYPE_DATETIMEV2);
+    EXPECT_LT(timestamp_stats.min_value, timestamp_stats.max_value);
+}
+
+TEST(ParquetStatisticsTransformTest, HandlesMissingStatisticsAndAllNullChunks) {
+    auto no_stats_table = arrow::Table::Make(
+            arrow::schema({arrow::field("i", arrow::int32(), true)}), {int32_array({1, 2, 3})});
+    auto no_stats_reader = make_reader(no_stats_table, 3, false, false);
+    auto no_stats_schema = build_file_schema(*no_stats_reader);
+    auto no_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *no_stats_schema[0],
+            no_stats_reader->metadata()->RowGroup(0)->ColumnChunk(0)->statistics());
+    EXPECT_FALSE(no_stats.has_min_max);
+
+    auto all_null_table =
+            arrow::Table::Make(arrow::schema({arrow::field("i", arrow::int32(), true)}),
+                               {int32_array({std::nullopt, std::nullopt})});
+    auto all_null_reader = make_reader(all_null_table, 2, false, true);
+    auto all_null_schema = build_file_schema(*all_null_reader);
+    auto all_null_stats = format::parquet::ParquetStatisticsUtils::TransformColumnStatistics(
+            *all_null_schema[0],
+            all_null_reader->metadata()->RowGroup(0)->ColumnChunk(0)->statistics());
+    EXPECT_TRUE(all_null_stats.has_null_count);
+    EXPECT_TRUE(all_null_stats.has_null);
+    EXPECT_FALSE(all_null_stats.has_not_null);
+    EXPECT_FALSE(all_null_stats.has_min_max);
+}
+
+TEST(ParquetStatisticsPruningTest, StatisticsPredicatesAndNullPredicatesPruneRowGroups) {
+    auto table = arrow::Table::Make(arrow::schema({arrow::field("i", arrow::int32(), true)}),
+                                    {int32_array({std::nullopt, std::nullopt, 3, 4, 5, 6})});
+    auto reader = make_reader(table, 2, false, true);
+    auto schema = build_file_schema(*reader);
+
+    format::FileColumnPredicateFilter ge_filter;
+    ge_filter.file_column_id = format::LocalColumnId(0);
+    ge_filter.predicates.push_back(create_comparison_predicate<PredicateType::GE>(
+            0, "i", schema[0]->type, Field::create_field<TYPE_INT>(5), false));
+    std::vector<int> selected;
+    format::parquet::ParquetPruningStats pruning_stats;
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema, request_with_filter(ge_filter),
+                        nullptr, &selected, false, &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({2}));
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_statistics, 2);
+
+    format::FileColumnPredicateFilter is_not_null_filter;
+    is_not_null_filter.file_column_id = format::LocalColumnId(0);
+    is_not_null_filter.predicates.push_back(
+            std::make_shared<NullPredicate>(0, "i", false, TYPE_INT));
+    selected.clear();
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(is_not_null_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({1, 2}));
+
+    format::FileColumnPredicateFilter is_null_filter;
+    is_null_filter.file_column_id = format::LocalColumnId(0);
+    is_null_filter.predicates.push_back(std::make_shared<NullPredicate>(0, "i", true, TYPE_INT));
+    selected.clear();
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(is_null_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({0}));
+}
+
+TEST(ParquetStatisticsPruningTest, DictionaryPruningHandlesExcludeIncludeAndUnsupportedPaths) {
+    auto table = arrow::Table::Make(arrow::schema({arrow::field("s", arrow::utf8(), false)}),
+                                    {string_array({"alpha", "beta", "gamma", "omega"})});
+    auto reader = make_reader(table, 2, true, false);
+    auto schema = build_file_schema(*reader);
+
+    format::FileColumnPredicateFilter absent_filter;
+    absent_filter.file_column_id = format::LocalColumnId(0);
+    absent_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            0, "s", schema[0]->type, Field::create_field<TYPE_STRING>("missing"), false));
+    std::vector<int> selected;
+    format::parquet::ParquetPruningStats pruning_stats;
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(absent_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_TRUE(selected.empty());
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 2);
+
+    format::FileColumnPredicateFilter present_filter;
+    present_filter.file_column_id = format::LocalColumnId(0);
+    present_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            0, "s", schema[0]->type, Field::create_field<TYPE_STRING>("gamma"), false));
+    selected.clear();
+    pruning_stats = {};
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(present_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({1}));
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 1);
+
+    auto plain_reader = make_reader(table, 2, false, false);
+    auto plain_schema = build_file_schema(*plain_reader);
+    selected.clear();
+    pruning_stats = {};
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *plain_reader->metadata(), plain_reader.get(), plain_schema,
+                        request_with_filter(absent_filter), nullptr, &selected, false,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({0, 1}));
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 0);
+}
+
+TEST(ParquetStatisticsPruningTest, StatisticsRunsBeforeDictionaryAndMissingBloomKeepsRows) {
+    auto table = arrow::Table::Make(arrow::schema({arrow::field("s", arrow::utf8(), false)}),
+                                    {string_array({"alpha", "beta", "gamma", "omega"})});
+    auto reader = make_reader(table, 2, true, true);
+    auto schema = build_file_schema(*reader);
+
+    format::FileColumnPredicateFilter beyond_max_filter;
+    beyond_max_filter.file_column_id = format::LocalColumnId(0);
+    beyond_max_filter.predicates.push_back(create_comparison_predicate<PredicateType::GT>(
+            0, "s", schema[0]->type, Field::create_field<TYPE_STRING>("zzzz"), false));
+    std::vector<int> selected;
+    format::parquet::ParquetPruningStats pruning_stats;
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *reader->metadata(), reader.get(), schema,
+                        request_with_filter(beyond_max_filter), nullptr, &selected, true,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_TRUE(selected.empty());
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_statistics, 2);
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_dictionary, 0);
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_bloom_filter, 0);
+
+    auto no_stats_reader = make_reader(table, 2, false, false);
+    auto no_stats_schema = build_file_schema(*no_stats_reader);
+    format::FileColumnPredicateFilter missing_bloom_filter;
+    missing_bloom_filter.file_column_id = format::LocalColumnId(0);
+    missing_bloom_filter.predicates.push_back(create_comparison_predicate<PredicateType::EQ>(
+            0, "s", no_stats_schema[0]->type, Field::create_field<TYPE_STRING>("absent"), false));
+    selected.clear();
+    pruning_stats = {};
+    ASSERT_TRUE(format::parquet::select_row_groups_by_statistics(
+                        *no_stats_reader->metadata(), no_stats_reader.get(), no_stats_schema,
+                        request_with_filter(missing_bloom_filter), nullptr, &selected, true,
+                        &pruning_stats)
+                        .ok());
+    EXPECT_EQ(selected, std::vector<int>({0, 1}));
+    EXPECT_EQ(pruning_stats.filtered_row_groups_by_bloom_filter, 0);
+}
+
+::parquet::BlockSplitBloomFilter bloom_filter_for_string_values(
+        const std::vector<std::string>& values) {
+    ::parquet::BlockSplitBloomFilter bloom_filter;
+    bloom_filter.Init(::parquet::BlockSplitBloomFilter::kMinimumBloomFilterBytes);
+    for (const auto& value : values) {
+        ::parquet::ByteArray byte_array(static_cast<uint32_t>(value.size()),
+                                        reinterpret_cast<const uint8_t*>(value.data()));
+        bloom_filter.InsertHash(bloom_filter.Hash(&byte_array));
+    }
+    return bloom_filter;
+}
+
+TEST(ParquetBloomFilterPruningTest, EqPredicateUsesArrowHashAndPrunesAbsentIntValue) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeInt32>());
+    auto bloom_filter = bloom_filter_for_int32_values({1, 3});
+    auto absent_filter = bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_INT>(2), false));
+    auto present_filter =
+            bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+                    0, "c0", schema.type, Field::create_field<TYPE_INT>(3), false));
+
+    EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter,
+                                                                             bloom_filter));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            schema, present_filter, bloom_filter));
+}
+
+TEST(ParquetBloomFilterPruningTest, InPredicatePrunesOnlyWhenAllValuesAreAbsent) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeInt32>());
+    auto bloom_filter = bloom_filter_for_int32_values({1, 3});
+
+    auto absent_set = build_set<TYPE_INT>();
+    int32_t absent_first = 2;
+    int32_t absent_second = 4;
+    absent_set->insert(&absent_first);
+    absent_set->insert(&absent_second);
+    auto absent_filter =
+            bloom_filter_with_predicate(create_in_list_predicate<PredicateType::IN_LIST>(
+                    0, "c0", schema.type, absent_set, false));
+
+    auto present_set = build_set<TYPE_INT>();
+    int32_t present_first = 2;
+    int32_t present_second = 3;
+    present_set->insert(&present_first);
+    present_set->insert(&present_second);
+    auto present_filter =
+            bloom_filter_with_predicate(create_in_list_predicate<PredicateType::IN_LIST>(
+                    0, "c0", schema.type, present_set, false));
+
+    EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter,
+                                                                             bloom_filter));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            schema, present_filter, bloom_filter));
+}
+
+TEST(ParquetBloomFilterPruningTest, BooleanPredicateHashesAsParquetInt32) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeBool>());
+    auto bloom_filter = bloom_filter_for_int32_values({1});
+    auto false_filter = bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_BOOLEAN>(false), false));
+    auto true_filter = bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_BOOLEAN>(true), false));
+
+    EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, false_filter,
+                                                                             bloom_filter));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, true_filter,
+                                                                              bloom_filter));
+}
+
+TEST(ParquetBloomFilterPruningTest, StringPredicateUsesArrowByteArrayHash) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeString>());
+    auto bloom_filter = bloom_filter_for_string_values({"alpha", "omega"});
+    auto absent_filter = bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_STRING>("beta"), false));
+    auto present_filter =
+            bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+                    0, "c0", schema.type, Field::create_field<TYPE_STRING>("alpha"), false));
+
+    EXPECT_TRUE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(schema, absent_filter,
+                                                                             bloom_filter));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            schema, present_filter, bloom_filter));
+}
+
+TEST(ParquetBloomFilterPruningTest, NullableAcceptingAndUnsupportedPredicatesKeepRowGroup) {
+    auto schema = primitive_bloom_schema(std::make_shared<DataTypeInt32>());
+    auto bloom_filter = bloom_filter_for_int32_values({1});
+    auto nested_predicate = create_comparison_predicate<PredicateType::EQ>(
+            0, "c0", schema.type, Field::create_field<TYPE_INT>(2), false);
+    auto accept_null_filter =
+            bloom_filter_with_predicate(std::make_shared<AcceptNullPredicate>(nested_predicate));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            schema, accept_null_filter, bloom_filter));
+
+    auto unsupported_schema = primitive_bloom_schema(std::make_shared<DataTypeInt16>());
+    auto unsupported_filter =
+            bloom_filter_with_predicate(create_comparison_predicate<PredicateType::EQ>(
+                    0, "c0", unsupported_schema.type, Field::create_field<TYPE_SMALLINT>(2),
+                    false));
+    EXPECT_FALSE(format::parquet::ParquetStatisticsUtils::BloomFilterExcludes(
+            unsupported_schema, unsupported_filter, bloom_filter));
+}
+
+} // namespace
+} // namespace doris
diff --git a/be/test/format_v2/parquet/parquet_type_test.cpp b/be/test/format_v2/parquet/parquet_type_test.cpp
new file mode 100644
index 00000000000000..4bca77c1803b49
--- /dev/null
+++ b/be/test/format_v2/parquet/parquet_type_test.cpp
@@ -0,0 +1,494 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/parquet/parquet_type.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/schema.h>
+#include <parquet/arrow/writer.h>
+#include <parquet/file_reader.h>
+
+#include <vector>
+
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/primitive_type.h"
+
+namespace doris::format::parquet {
+namespace {
+
+::parquet::SchemaDescriptor make_descriptor(const ::parquet::schema::NodePtr& node) {
+    auto schema =
+            ::parquet::schema::GroupNode::Make("schema", ::parquet::Repetition::REQUIRED, {node});
+    ::parquet::SchemaDescriptor descriptor;
+    descriptor.Init(schema);
+    return descriptor;
+}
+
+ParquetTypeDescriptor resolve_node(const ::parquet::schema::NodePtr& node) {
+    auto descriptor = make_descriptor(node);
+    return resolve_parquet_type(descriptor.Column(0));
+}
+
+PrimitiveType primitive_type(const DataTypePtr& type) {
+    return remove_nullable(type)->get_primitive_type();
+}
+
+int scale_of(const DataTypePtr& type) {
+    return remove_nullable(type)->get_scale();
+}
+
+std::shared_ptr<arrow::Array> make_float16_array() {
+    arrow::HalfFloatBuilder builder;
+    EXPECT_TRUE(builder.Append(0x3E00).ok());
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder.Finish(&array).ok());
+    return array;
+}
+
+ParquetTypeDescriptor resolve_arrow_float16_type() {
+    const auto schema = arrow::schema({arrow::field("f16", arrow::float16(), true)});
+    const auto table = arrow::Table::Make(schema, {make_float16_array()});
+    auto out_result = arrow::io::BufferOutputStream::Create();
+    EXPECT_TRUE(out_result.ok());
+    auto out = *out_result;
+    EXPECT_TRUE(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1).ok());
+    auto buffer_result = out->Finish();
+    EXPECT_TRUE(buffer_result.ok());
+
+    auto reader = ::parquet::ParquetFileReader::Open(
+            std::make_shared<arrow::io::BufferReader>(*buffer_result));
+    return resolve_parquet_type(reader->metadata()->schema()->Column(0));
+}
+
+} // namespace
+
+TEST(ParquetTypeTest, ResolveLogicalIntegerMappings) {
+    struct Case {
+        int bit_width;
+        bool is_signed;
+        PrimitiveType expected_type;
+        bool expected_unsigned;
+    };
+    const std::vector<Case> cases = {
+            {8, true, TYPE_TINYINT, false},   {8, false, TYPE_SMALLINT, true},
+            {16, true, TYPE_SMALLINT, false}, {16, false, TYPE_INT, true},
+            {32, true, TYPE_INT, false},      {32, false, TYPE_BIGINT, true},
+            {64, true, TYPE_BIGINT, false},   {64, false, TYPE_LARGEINT, true},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.bit_width);
+        const auto node = ::parquet::schema::PrimitiveNode::Make(
+                "c", ::parquet::Repetition::REQUIRED,
+                ::parquet::LogicalType::Int(test_case.bit_width, test_case.is_signed),
+                test_case.bit_width == 64 ? ::parquet::Type::INT64 : ::parquet::Type::INT32);
+        const auto type = resolve_node(node);
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type);
+        EXPECT_EQ(type.integer_bit_width, test_case.bit_width);
+        EXPECT_EQ(type.is_unsigned_integer, test_case.expected_unsigned);
+        EXPECT_TRUE(type.supports_record_reader);
+    }
+}
+
+TEST(ParquetTypeTest, ResolveLogicalTimeAndTimestampMappings) {
+    const auto time_millis = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_ms", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::MILLIS),
+            ::parquet::Type::INT32));
+    ASSERT_NE(time_millis.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(time_millis.doris_type), TYPE_TIMEV2);
+    EXPECT_EQ(time_millis.time_unit, ParquetTimeUnit::MILLIS);
+    EXPECT_EQ(time_millis.extra_type_info, ParquetExtraTypeInfo::UNIT_MS);
+
+    const auto time_micros = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_us", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::MICROS),
+            ::parquet::Type::INT64));
+    ASSERT_NE(time_micros.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(time_micros.doris_type), TYPE_TIMEV2);
+    EXPECT_EQ(time_micros.time_unit, ParquetTimeUnit::MICROS);
+    EXPECT_EQ(time_micros.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS);
+
+    const auto adjusted_time = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_adjusted", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::MILLIS),
+            ::parquet::Type::INT32));
+    EXPECT_EQ(adjusted_time.doris_type, nullptr);
+    EXPECT_FALSE(adjusted_time.supports_record_reader);
+    EXPECT_FALSE(adjusted_time.unsupported_reason.empty());
+
+    const auto timestamp_nanos = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "ts_ns", ::parquet::Repetition::OPTIONAL,
+            ::parquet::LogicalType::Timestamp(true, ::parquet::LogicalType::TimeUnit::NANOS),
+            ::parquet::Type::INT64));
+    ASSERT_NE(timestamp_nanos.doris_type, nullptr);
+    EXPECT_TRUE(timestamp_nanos.doris_type->is_nullable());
+    EXPECT_EQ(primitive_type(timestamp_nanos.doris_type), TYPE_DATETIMEV2);
+    EXPECT_TRUE(timestamp_nanos.is_timestamp);
+    EXPECT_TRUE(timestamp_nanos.timestamp_is_adjusted_to_utc);
+    EXPECT_EQ(timestamp_nanos.time_unit, ParquetTimeUnit::NANOS);
+    EXPECT_EQ(timestamp_nanos.extra_type_info, ParquetExtraTypeInfo::UNIT_NS);
+}
+
+TEST(ParquetTypeTest, ResolveLogicalTimestampMatrix) {
+    struct Case {
+        ::parquet::LogicalType::TimeUnit::unit parquet_unit;
+        bool adjusted_to_utc;
+        ParquetTimeUnit expected_unit;
+        ParquetExtraTypeInfo expected_extra;
+        int expected_scale;
+    };
+    const std::vector<Case> cases = {
+            {::parquet::LogicalType::TimeUnit::MILLIS, true, ParquetTimeUnit::MILLIS,
+             ParquetExtraTypeInfo::UNIT_MS, 3},
+            {::parquet::LogicalType::TimeUnit::MILLIS, false, ParquetTimeUnit::MILLIS,
+             ParquetExtraTypeInfo::UNIT_MS, 3},
+            {::parquet::LogicalType::TimeUnit::MICROS, true, ParquetTimeUnit::MICROS,
+             ParquetExtraTypeInfo::UNIT_MICROS, 6},
+            {::parquet::LogicalType::TimeUnit::MICROS, false, ParquetTimeUnit::MICROS,
+             ParquetExtraTypeInfo::UNIT_MICROS, 6},
+            {::parquet::LogicalType::TimeUnit::NANOS, true, ParquetTimeUnit::NANOS,
+             ParquetExtraTypeInfo::UNIT_NS, 6},
+            {::parquet::LogicalType::TimeUnit::NANOS, false, ParquetTimeUnit::NANOS,
+             ParquetExtraTypeInfo::UNIT_NS, 6},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.expected_scale);
+        const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+                "ts", ::parquet::Repetition::OPTIONAL,
+                ::parquet::LogicalType::Timestamp(test_case.adjusted_to_utc,
+                                                  test_case.parquet_unit),
+                ::parquet::Type::INT64));
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_TRUE(type.doris_type->is_nullable());
+        EXPECT_EQ(primitive_type(type.doris_type), TYPE_DATETIMEV2);
+        EXPECT_EQ(scale_of(type.doris_type), test_case.expected_scale);
+        EXPECT_TRUE(type.is_timestamp);
+        EXPECT_EQ(type.timestamp_is_adjusted_to_utc, test_case.adjusted_to_utc);
+        EXPECT_EQ(type.time_unit, test_case.expected_unit);
+        EXPECT_EQ(type.extra_type_info, test_case.expected_extra);
+    }
+}
+
+TEST(ParquetTypeTest, ConvertedTimeIsRejectedButConvertedTimestampIsSupported) {
+    const auto converted_time = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32,
+            ::parquet::ConvertedType::TIME_MILLIS));
+    EXPECT_EQ(converted_time.doris_type, nullptr);
+    EXPECT_FALSE(converted_time.supports_record_reader);
+    EXPECT_FALSE(converted_time.unsupported_reason.empty());
+
+    const auto converted_timestamp = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "ts_ms", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT64,
+            ::parquet::ConvertedType::TIMESTAMP_MILLIS));
+    ASSERT_NE(converted_timestamp.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(converted_timestamp.doris_type), TYPE_DATETIMEV2);
+    EXPECT_TRUE(converted_timestamp.is_timestamp);
+    EXPECT_TRUE(converted_timestamp.timestamp_is_adjusted_to_utc);
+    EXPECT_EQ(converted_timestamp.time_unit, ParquetTimeUnit::MILLIS);
+
+    const auto converted_timestamp_micros = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "ts_us", ::parquet::Repetition::OPTIONAL, ::parquet::Type::INT64,
+            ::parquet::ConvertedType::TIMESTAMP_MICROS));
+    ASSERT_NE(converted_timestamp_micros.doris_type, nullptr);
+    EXPECT_TRUE(converted_timestamp_micros.doris_type->is_nullable());
+    EXPECT_EQ(primitive_type(converted_timestamp_micros.doris_type), TYPE_DATETIMEV2);
+    EXPECT_EQ(scale_of(converted_timestamp_micros.doris_type), 6);
+    EXPECT_TRUE(converted_timestamp_micros.is_timestamp);
+    EXPECT_TRUE(converted_timestamp_micros.timestamp_is_adjusted_to_utc);
+    EXPECT_EQ(converted_timestamp_micros.time_unit, ParquetTimeUnit::MICROS);
+    EXPECT_EQ(converted_timestamp_micros.extra_type_info, ParquetExtraTypeInfo::UNIT_MICROS);
+}
+
+TEST(ParquetTypeTest, ResolveConvertedIntegerMappingsAndDecodedKinds) {
+    struct Case {
+        ::parquet::ConvertedType::type converted_type;
+        ::parquet::Type::type physical_type;
+        PrimitiveType expected_type;
+        int bit_width;
+        bool expected_unsigned;
+        DecodedValueKind expected_value_kind;
+    };
+    const std::vector<Case> cases = {
+            {::parquet::ConvertedType::INT_8, ::parquet::Type::INT32, TYPE_TINYINT, 8, false,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::UINT_8, ::parquet::Type::INT32, TYPE_SMALLINT, 8, true,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::INT_16, ::parquet::Type::INT32, TYPE_SMALLINT, 16, false,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::UINT_16, ::parquet::Type::INT32, TYPE_INT, 16, true,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::INT_32, ::parquet::Type::INT32, TYPE_INT, 32, false,
+             DecodedValueKind::INT32},
+            {::parquet::ConvertedType::UINT_32, ::parquet::Type::INT32, TYPE_BIGINT, 32, true,
+             DecodedValueKind::UINT32},
+            {::parquet::ConvertedType::INT_64, ::parquet::Type::INT64, TYPE_BIGINT, 64, false,
+             DecodedValueKind::INT64},
+            {::parquet::ConvertedType::UINT_64, ::parquet::Type::INT64, TYPE_LARGEINT, 64, true,
+             DecodedValueKind::UINT64},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.converted_type);
+        const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+                "c", ::parquet::Repetition::REQUIRED, test_case.physical_type,
+                test_case.converted_type));
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type);
+        EXPECT_EQ(type.integer_bit_width, test_case.bit_width);
+        EXPECT_EQ(type.is_unsigned_integer, test_case.expected_unsigned);
+        EXPECT_EQ(decoded_value_kind(type), test_case.expected_value_kind);
+    }
+}
+
+TEST(ParquetTypeTest, ResolveConvertedDecimalCarriers) {
+    struct Case {
+        ::parquet::Type::type physical_type;
+        int type_length;
+        int precision;
+        int scale;
+        PrimitiveType expected_type;
+        ParquetExtraTypeInfo expected_extra;
+    };
+    const std::vector<Case> cases = {
+            {::parquet::Type::INT32, -1, 9, 2, TYPE_DECIMAL32, ParquetExtraTypeInfo::DECIMAL_INT32},
+            {::parquet::Type::INT64, -1, 18, 6, TYPE_DECIMAL64,
+             ParquetExtraTypeInfo::DECIMAL_INT64},
+            {::parquet::Type::BYTE_ARRAY, -1, 20, 5, TYPE_DECIMAL128I,
+             ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY},
+            {::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16, 38, 6, TYPE_DECIMAL128I,
+             ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY},
+            {::parquet::Type::FIXED_LEN_BYTE_ARRAY, 20, 39, 6, TYPE_DECIMAL256,
+             ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.physical_type);
+        const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+                "d", ::parquet::Repetition::REQUIRED, test_case.physical_type,
+                ::parquet::ConvertedType::DECIMAL, test_case.type_length, test_case.precision,
+                test_case.scale));
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type);
+        EXPECT_TRUE(type.is_decimal);
+        EXPECT_FALSE(type.is_string_like);
+        EXPECT_EQ(type.decimal_precision, test_case.precision);
+        EXPECT_EQ(type.decimal_scale, test_case.scale);
+        EXPECT_EQ(type.extra_type_info, test_case.expected_extra);
+    }
+}
+
+TEST(ParquetTypeTest, ResolveLogicalStringDateAndDecimalMappings) {
+    const std::vector<std::shared_ptr<const ::parquet::LogicalType>> string_like_logical_types = {
+            ::parquet::LogicalType::String(), ::parquet::LogicalType::Enum(),
+            ::parquet::LogicalType::JSON(), ::parquet::LogicalType::BSON()};
+    for (const auto& logical_type : string_like_logical_types) {
+        const auto type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+                "s", ::parquet::Repetition::OPTIONAL, logical_type, ::parquet::Type::BYTE_ARRAY));
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_TRUE(type.doris_type->is_nullable());
+        EXPECT_EQ(primitive_type(type.doris_type), TYPE_STRING);
+        EXPECT_TRUE(type.is_string_like);
+    }
+
+    const auto uuid = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "uuid", ::parquet::Repetition::OPTIONAL, ::parquet::LogicalType::UUID(),
+            ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16));
+    ASSERT_NE(uuid.doris_type, nullptr);
+    EXPECT_TRUE(uuid.doris_type->is_nullable());
+    EXPECT_EQ(primitive_type(uuid.doris_type), TYPE_STRING);
+    EXPECT_TRUE(uuid.is_string_like);
+
+    const auto date = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Date(),
+            ::parquet::Type::INT32));
+    ASSERT_NE(date.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(date.doris_type), TYPE_DATEV2);
+
+    const auto decimal64 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d64", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(18, 6),
+            ::parquet::Type::INT64));
+    ASSERT_NE(decimal64.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(decimal64.doris_type), TYPE_DECIMAL64);
+    EXPECT_TRUE(decimal64.is_decimal);
+    EXPECT_EQ(decimal64.decimal_precision, 18);
+    EXPECT_EQ(decimal64.decimal_scale, 6);
+    EXPECT_EQ(decimal64.extra_type_info, ParquetExtraTypeInfo::DECIMAL_INT64);
+
+    const auto decimal128 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d128", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(38, 6),
+            ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 16));
+    ASSERT_NE(decimal128.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(decimal128.doris_type), TYPE_DECIMAL128I);
+    EXPECT_TRUE(decimal128.is_decimal);
+    EXPECT_EQ(decimal128.decimal_precision, 38);
+    EXPECT_EQ(decimal128.decimal_scale, 6);
+    EXPECT_EQ(decimal128.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY);
+
+    const auto decimal256 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d256", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Decimal(39, 6),
+            ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 20));
+    ASSERT_NE(decimal256.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(decimal256.doris_type), TYPE_DECIMAL256);
+    EXPECT_TRUE(decimal256.is_decimal);
+    EXPECT_EQ(decimal256.decimal_precision, 39);
+    EXPECT_EQ(decimal256.decimal_scale, 6);
+    EXPECT_EQ(decimal256.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY);
+    EXPECT_FALSE(decimal256.is_string_like);
+}
+
+TEST(ParquetTypeTest, LogicalConvertedAndPhysicalFallbackLevelsAreDistinct) {
+    const auto logical_type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "c", ::parquet::Repetition::REQUIRED, ::parquet::LogicalType::Int(8, true),
+            ::parquet::Type::INT32));
+    ASSERT_NE(logical_type.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(logical_type.doris_type), TYPE_TINYINT);
+    EXPECT_EQ(logical_type.integer_bit_width, 8);
+
+    const auto converted_type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "c", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32,
+            ::parquet::ConvertedType::INT_8));
+    ASSERT_NE(converted_type.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(converted_type.doris_type), TYPE_TINYINT);
+    EXPECT_EQ(converted_type.integer_bit_width, 8);
+
+    const auto physical_type = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "c", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT32));
+    ASSERT_NE(physical_type.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(physical_type.doris_type), TYPE_INT);
+    EXPECT_EQ(physical_type.integer_bit_width, -1);
+}
+
+TEST(ParquetTypeTest, ResolveDecimalStringLikeFloat16AndPhysicalFallback) {
+    const auto decimal256 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "d", ::parquet::Repetition::REQUIRED, ::parquet::Type::FIXED_LEN_BYTE_ARRAY,
+            ::parquet::ConvertedType::DECIMAL, 20, 39, 6));
+    ASSERT_NE(decimal256.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(decimal256.doris_type), TYPE_DECIMAL256);
+    EXPECT_TRUE(decimal256.is_decimal);
+    EXPECT_FALSE(decimal256.is_string_like);
+    EXPECT_EQ(decimal256.decimal_precision, 39);
+    EXPECT_EQ(decimal256.decimal_scale, 6);
+    EXPECT_EQ(decimal256.extra_type_info, ParquetExtraTypeInfo::DECIMAL_BYTE_ARRAY);
+
+    const auto plain_binary = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "s", ::parquet::Repetition::REQUIRED, ::parquet::Type::BYTE_ARRAY));
+    ASSERT_NE(plain_binary.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(plain_binary.doris_type), TYPE_STRING);
+    EXPECT_TRUE(plain_binary.is_string_like);
+
+    const auto float16 = resolve_arrow_float16_type();
+    ASSERT_NE(float16.doris_type, nullptr);
+    EXPECT_TRUE(float16.doris_type->is_nullable());
+    EXPECT_EQ(float16.physical_type, ::parquet::Type::FIXED_LEN_BYTE_ARRAY);
+    EXPECT_EQ(float16.fixed_length, 2);
+    EXPECT_EQ(primitive_type(float16.doris_type), TYPE_FLOAT);
+    EXPECT_EQ(float16.extra_type_info, ParquetExtraTypeInfo::FLOAT16);
+    EXPECT_FALSE(float16.is_string_like);
+    EXPECT_EQ(decoded_value_kind(float16), DecodedValueKind::FIXED_BINARY);
+}
+
+TEST(ParquetTypeTest, ResolveNullDescriptorAndPhysicalFallback) {
+    const auto null_type = resolve_parquet_type(nullptr);
+    EXPECT_EQ(null_type.doris_type, nullptr);
+    EXPECT_EQ(null_type.physical_type, ::parquet::Type::UNDEFINED);
+    EXPECT_TRUE(null_type.supports_record_reader);
+
+    const auto int96 = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "ts", ::parquet::Repetition::REQUIRED, ::parquet::Type::INT96));
+    ASSERT_NE(int96.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(int96.doris_type), TYPE_DATETIMEV2);
+    EXPECT_EQ(int96.extra_type_info, ParquetExtraTypeInfo::IMPALA_TIMESTAMP);
+    EXPECT_EQ(decoded_value_kind(int96), DecodedValueKind::INT96);
+}
+
+TEST(ParquetTypeTest, ResolveEveryPhysicalFallback) {
+    struct Case {
+        ::parquet::schema::NodePtr node;
+        PrimitiveType expected_type;
+        DecodedValueKind expected_kind;
+        bool expected_string_like = false;
+    };
+    const std::vector<Case> cases = {
+            {::parquet::schema::PrimitiveNode::Make("b", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::BOOLEAN),
+             TYPE_BOOLEAN, DecodedValueKind::BOOL},
+            {::parquet::schema::PrimitiveNode::Make("i32", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::INT32),
+             TYPE_INT, DecodedValueKind::INT32},
+            {::parquet::schema::PrimitiveNode::Make("i64", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::INT64),
+             TYPE_BIGINT, DecodedValueKind::INT64},
+            {::parquet::schema::PrimitiveNode::Make("f", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::FLOAT),
+             TYPE_FLOAT, DecodedValueKind::FLOAT},
+            {::parquet::schema::PrimitiveNode::Make("d", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::DOUBLE),
+             TYPE_DOUBLE, DecodedValueKind::DOUBLE},
+            {::parquet::schema::PrimitiveNode::Make("s", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::BYTE_ARRAY),
+             TYPE_STRING, DecodedValueKind::BINARY, true},
+            {::parquet::schema::PrimitiveNode::Make("fs", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::FIXED_LEN_BYTE_ARRAY,
+                                                    ::parquet::ConvertedType::NONE, 4),
+             TYPE_STRING, DecodedValueKind::FIXED_BINARY, true},
+            {::parquet::schema::PrimitiveNode::Make("ts", ::parquet::Repetition::REQUIRED,
+                                                    ::parquet::Type::INT96),
+             TYPE_DATETIMEV2, DecodedValueKind::INT96},
+    };
+
+    for (const auto& test_case : cases) {
+        SCOPED_TRACE(test_case.expected_type);
+        const auto type = resolve_node(test_case.node);
+        ASSERT_NE(type.doris_type, nullptr);
+        EXPECT_EQ(primitive_type(type.doris_type), test_case.expected_type);
+        EXPECT_EQ(decoded_value_kind(type), test_case.expected_kind);
+        EXPECT_EQ(type.is_string_like, test_case.expected_string_like);
+        EXPECT_TRUE(type.supports_record_reader);
+    }
+}
+
+TEST(ParquetTypeTest, InvalidLogicalAnnotationsFallBackOrRejectAsSpecified) {
+    EXPECT_THROW(::parquet::LogicalType::Int(24, true), ::parquet::ParquetException);
+
+    const auto nanos_time = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_ns", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(false, ::parquet::LogicalType::TimeUnit::NANOS),
+            ::parquet::Type::INT64));
+    ASSERT_NE(nanos_time.doris_type, nullptr);
+    EXPECT_EQ(primitive_type(nanos_time.doris_type), TYPE_BIGINT);
+    EXPECT_TRUE(nanos_time.unsupported_reason.empty());
+
+    const auto adjusted_nanos_time = resolve_node(::parquet::schema::PrimitiveNode::Make(
+            "time_ns_utc", ::parquet::Repetition::REQUIRED,
+            ::parquet::LogicalType::Time(true, ::parquet::LogicalType::TimeUnit::NANOS),
+            ::parquet::Type::INT64));
+    EXPECT_EQ(adjusted_nanos_time.doris_type, nullptr);
+    EXPECT_FALSE(adjusted_nanos_time.supports_record_reader);
+    EXPECT_FALSE(adjusted_nanos_time.unsupported_reason.empty());
+
+    EXPECT_THROW(::parquet::schema::PrimitiveNode::Make("f16_bad", ::parquet::Repetition::REQUIRED,
+                                                        ::parquet::LogicalType::Float16(),
+                                                        ::parquet::Type::FIXED_LEN_BYTE_ARRAY, 4),
+                 ::parquet::ParquetException);
+}
+
+} // namespace doris::format::parquet
diff --git a/be/test/format_v2/table/hive_reader_test.cpp b/be/test/format_v2/table/hive_reader_test.cpp
new file mode 100644
index 00000000000000..67be16856f53eb
--- /dev/null
+++ b/be/test/format_v2/table/hive_reader_test.cpp
@@ -0,0 +1,151 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/hive_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format::hive {
+namespace {
+
+ColumnDefinition table_column(const std::string& name, DataTypePtr type) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_STRING>(name);
+    column.name = name;
+    column.type = std::move(type);
+    return column;
+}
+
+Status init_hive_reader(FileFormat format, TFileScanRangeParams* params, RuntimeState* state,
+                        RuntimeProfile* profile, HiveReader* reader) {
+    return reader->init({
+            .projected_columns = {table_column("id", std::make_shared<DataTypeInt32>()),
+                                  table_column("name", std::make_shared<DataTypeString>())},
+            .column_predicates = {},
+            .conjuncts = {},
+            .format = format,
+            .scan_params = params,
+            .io_ctx = nullptr,
+            .runtime_state = state,
+            .scanner_profile = profile,
+    });
+}
+
+class HiveV2ReaderTest : public testing::Test {
+public:
+    HiveV2ReaderTest() : state(query_options, query_globals), profile("hive_v2_reader_test") {}
+
+protected:
+    TQueryOptions query_options;
+    TQueryGlobals query_globals;
+    RuntimeState state;
+    RuntimeProfile profile;
+};
+
+// Scenario: Hive tables using OpenCSVSerde are planned as table_format=hive with CSV file format.
+// HiveReader must allow that file format so TableReader can create the v2 CsvReader.
+TEST_F(HiveV2ReaderTest, InitSupportsCsvFileFormat) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN);
+    HiveReader reader;
+
+    ASSERT_TRUE(init_hive_reader(FileFormat::CSV, &params, &state, &profile, &reader).ok());
+    EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+// Scenario: Hive text files also synthesize a file-local schema from FE slots, so they should use
+// name mapping at the table-reader layer while TextReader consumes column_idxs for field ordinals.
+TEST_F(HiveV2ReaderTest, InitSupportsTextFileFormat) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_TEXT);
+    HiveReader reader;
+
+    ASSERT_TRUE(init_hive_reader(FileFormat::TEXT, &params, &state, &profile, &reader).ok());
+    EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+// Scenario: Hive JSON files also synthesize a file-local schema from FE slots, so they should use
+// name mapping at the table-reader layer while JsonReader consumes JSON attributes.
+TEST_F(HiveV2ReaderTest, InitSupportsJsonFileFormat) {
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_JSON);
+    HiveReader reader;
+
+    ASSERT_TRUE(init_hive_reader(FileFormat::JSON, &params, &state, &profile, &reader).ok());
+    EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+TEST_F(HiveV2ReaderTest, MappingModeUsesInitializedFormat) {
+    query_options.hive_parquet_use_column_names = false;
+    query_options.hive_orc_use_column_names = true;
+    state.set_query_options(query_options);
+
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    HiveReader reader;
+
+    ASSERT_TRUE(init_hive_reader(FileFormat::PARQUET, &params, &state, &profile, &reader).ok());
+    EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_INDEX);
+
+    SplitReadOptions parquet_split;
+    parquet_split.current_range.__set_path("split.parquet");
+    parquet_split.current_split_format = FileFormat::PARQUET;
+    ASSERT_TRUE(reader.prepare_split(parquet_split).ok());
+    EXPECT_EQ(reader.mapping_mode(), TableColumnMappingMode::BY_INDEX);
+
+    SplitReadOptions orc_split;
+    orc_split.current_range.__set_path("split.orc");
+    orc_split.current_split_format = FileFormat::ORC;
+    EXPECT_FALSE(reader.prepare_split(orc_split).ok());
+}
+
+// Scenario: positional mapping is only for Hive Parquet/ORC sessions that disable name mapping.
+// CSV keeps the synthesized file-column names and leaves column_idxs for the CsvReader itself.
+TEST_F(HiveV2ReaderTest, CsvDoesNotConsumeColumnIdxsAsPositionalSchemaMapping) {
+    query_options.hive_parquet_use_column_names = false;
+    TFileScanRangeParams params;
+    params.__set_format_type(TFileFormatType::FORMAT_CSV_PLAIN);
+    params.__set_column_idxs({3});
+    ProjectedColumnBuildContext context {
+            .scan_params = &params,
+            .runtime_state = &state,
+    };
+    HiveReader reader;
+
+    TFileScanSlotInfo slot;
+    slot.__set_is_file_slot(true);
+    auto column = table_column("value", std::make_shared<DataTypeInt32>());
+
+    ASSERT_TRUE(reader.annotate_projected_column(slot, &context, &column).ok());
+    ASSERT_TRUE(column.has_identifier_name());
+    EXPECT_EQ(column.get_identifier_name(), "value");
+    EXPECT_EQ(context.next_file_column_idx, 0);
+}
+
+} // namespace
+} // namespace doris::format::hive
diff --git a/be/test/format_v2/table/hudi_reader_test.cpp b/be/test/format_v2/table/hudi_reader_test.cpp
new file mode 100644
index 00000000000000..125183cd7a60c4
--- /dev/null
+++ b/be/test/format_v2/table/hudi_reader_test.cpp
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/hudi_reader.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "core/field.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/ExternalTableSchema_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::format {
+namespace {
+
+schema::external::TFieldPtr external_schema_field(std::string name, int32_t id,
+                                                  std::vector<std::string> aliases = {}) {
+    auto field = std::make_shared<schema::external::TField>();
+    field->__set_name(std::move(name));
+    field->__set_id(id);
+    if (!aliases.empty()) {
+        field->__set_name_mapping(std::move(aliases));
+    }
+    schema::external::TFieldPtr field_ptr;
+    field_ptr.field_ptr = std::move(field);
+    field_ptr.__isset.field_ptr = true;
+    return field_ptr;
+}
+
+schema::external::TSchema external_schema(int64_t schema_id,
+                                          std::vector<schema::external::TFieldPtr> fields) {
+    schema::external::TStructField root_field;
+    root_field.__set_fields(std::move(fields));
+    schema::external::TSchema schema;
+    schema.__set_schema_id(schema_id);
+    schema.__set_root_field(std::move(root_field));
+    return schema;
+}
+
+ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(id);
+    field.local_id = id;
+    field.name = name;
+    field.type = type;
+    return field;
+}
+
+TTableFormatFileDesc hudi_table_format_desc(std::optional<int64_t> schema_id) {
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("hudi");
+    THudiFileDesc hudi_params;
+    if (schema_id.has_value()) {
+        hudi_params.__set_schema_id(*schema_id);
+    }
+    table_format_params.__set_hudi_params(hudi_params);
+    return table_format_params;
+}
+
+// Scenario: FileScannerV2 Hudi native reader uses the split schema id to annotate the physical
+// file schema before TableColumnMapper runs. This keeps schema-evolved Hudi files on field-id
+// mapping, including renamed nested children.
+TEST(HudiReaderTest, AnnotatesFileSchemaFromSplitHistorySchema) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+
+    auto profile_field = external_schema_field("profile", 20);
+    schema::external::TStructField profile_struct;
+    profile_struct.__set_fields({external_schema_field("old_age", 21, {"age"})});
+    profile_field.field_ptr->nestedField.__set_struct_field(std::move(profile_struct));
+    profile_field.field_ptr->__isset.nestedField = true;
+
+    scan_params.__set_history_schema_info({
+            external_schema(100, {external_schema_field("old_name", 10, {"name"}), profile_field}),
+            external_schema(
+                    200, {external_schema_field("name", 10), external_schema_field("profile", 20)}),
+    });
+
+    hudi::HudiReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_table_format_params(hudi_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID);
+
+    auto string_type = std::make_shared<DataTypeString>();
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto profile_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"old_age"});
+    auto profile_column = make_file_column(1, "profile", profile_type);
+    profile_column.children = {make_file_column(0, "old_age", int_type)};
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(0, "old_name", string_type),
+            profile_column,
+    };
+
+    ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok());
+    ASSERT_EQ(file_schema.size(), 2);
+    EXPECT_EQ(file_schema[0].get_identifier_field_id(), 10);
+    EXPECT_EQ(file_schema[0].name_mapping, std::vector<std::string>({"name"}));
+    EXPECT_EQ(file_schema[1].get_identifier_field_id(), 20);
+    ASSERT_EQ(file_schema[1].children.size(), 1);
+    EXPECT_EQ(file_schema[1].children[0].get_identifier_field_id(), 21);
+    EXPECT_EQ(file_schema[1].children[0].name_mapping, std::vector<std::string>({"age"}));
+}
+
+// Scenario: a Hudi split can only use field-id mapping when its schema id resolves to a historical
+// schema sent by FE. Unknown or missing split schema ids must fall back to BY_NAME and leave the
+// physical file schema untouched.
+TEST(HudiReaderTest, FallsBackToByNameWhenSplitHistorySchemaIsMissing) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(200, {external_schema_field("name", 10)}),
+    });
+
+    hudi::HudiReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_table_format_params(hudi_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME);
+
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(0, "old_name", std::make_shared<DataTypeString>()),
+    };
+    ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok());
+    EXPECT_EQ(file_schema[0].get_identifier_field_id(), 0);
+    EXPECT_TRUE(file_schema[0].name_mapping.empty());
+}
+
+// Scenario: HudiReader must reset the previous split schema id before each split. Otherwise a
+// BY_FIELD_ID split could leak its schema id into the next split that carries no schema id.
+TEST(HudiReaderTest, ResetsSplitSchemaIdBeforePreparingNextSplit) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(100, {external_schema_field("old_name", 10, {"name"})}),
+            external_schema(200, {external_schema_field("name", 10)}),
+    });
+
+    hudi::HudiReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_with_schema_id;
+    split_with_schema_id.current_range.__set_table_format_params(hudi_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_with_schema_id).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID);
+
+    SplitReadOptions split_without_schema_id;
+    split_without_schema_id.current_range.__set_table_format_params(
+            hudi_table_format_desc(std::nullopt));
+    ASSERT_TRUE(reader.prepare_split(split_without_schema_id).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/table/iceberg_reader_test.cpp b/be/test/format_v2/table/iceberg_reader_test.cpp
new file mode 100644
index 00000000000000..84fe09bc0c55b5
--- /dev/null
+++ b/be/test/format_v2/table/iceberg_reader_test.cpp
@@ -0,0 +1,1852 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/iceberg_reader.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <algorithm>
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <optional>
+#include <string>
+#include <typeinfo>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_array.h"
+#include "core/column/column_const.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exec/common/endian.h"
+#include "exprs/runtime_filter_expr.h"
+#include "exprs/vectorized_fn_call.h"
+#include "exprs/vexpr.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "format/format_common.h"
+#include "format/table/deletion_vector_reader.h"
+#include "format_v2/table_reader.h"
+#include "gen_cpp/Exprs_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/io_common.h"
+#include "roaring/roaring64map.hh"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+#include "storage/predicate/predicate_creator.h"
+#include "storage/segment/condition_cache.h"
+
+namespace doris::format {
+namespace {
+
+LocalColumnIndex field_projection(int32_t column_id) {
+    return LocalColumnIndex {.index = column_id};
+}
+
+std::vector<int32_t> projection_ids(const std::vector<LocalColumnIndex>& projections) {
+    std::vector<int32_t> ids;
+    ids.reserve(projections.size());
+    for (const auto& projection : projections) {
+        ids.push_back(projection.index);
+    }
+    return ids;
+}
+VExprSPtr table_int32_slot_ref(int slot_id, int column_id, const std::string& column_name) {
+    const auto nullable_int_type = make_nullable(std::make_shared<DataTypeInt32>());
+    return VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int_type, column_name);
+}
+
+VExprSPtr table_int32_literal(int32_t value) {
+    return VLiteral::create_shared(std::make_shared<DataTypeInt32>(),
+                                   Field::create_field<TYPE_INT>(value));
+}
+
+VExprSPtr table_int64_literal(int64_t value) {
+    return VLiteral::create_shared(std::make_shared<DataTypeInt64>(),
+                                   Field::create_field<TYPE_BIGINT>(value));
+}
+
+TExprNode table_function_node(const std::string& function_name, const DataTypePtr& return_type,
+                              const std::vector<DataTypePtr>& arg_types,
+                              TExprNodeType::type node_type,
+                              TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE,
+                              bool short_circuit_evaluation = false) {
+    TFunctionName fn_name;
+    fn_name.__set_function_name(function_name);
+    TFunction fn;
+    fn.__set_name(fn_name);
+    fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+    std::vector<TTypeDesc> thrift_arg_types;
+    thrift_arg_types.reserve(arg_types.size());
+    for (const auto& arg_type : arg_types) {
+        thrift_arg_types.push_back(arg_type->to_thrift());
+    }
+    fn.__set_arg_types(thrift_arg_types);
+    fn.__set_ret_type(return_type->to_thrift());
+    fn.__set_has_var_args(false);
+
+    TExprNode node;
+    node.__set_node_type(node_type);
+    node.__set_opcode(opcode);
+    node.__set_type(return_type->to_thrift());
+    node.__set_fn(fn);
+    node.__set_num_children(static_cast<int16_t>(arg_types.size()));
+    node.__set_is_nullable(return_type->is_nullable());
+    if (short_circuit_evaluation) {
+        node.__set_short_circuit_evaluation(true);
+    }
+    return node;
+}
+
+VExprSPtr table_function_expr(const std::string& function_name, const DataTypePtr& return_type,
+                              const std::vector<DataTypePtr>& arg_types,
+                              TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL,
+                              TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE) {
+    const auto node = table_function_node(function_name, return_type, arg_types, node_type, opcode);
+    return VectorizedFnCall::create_shared(node);
+}
+
+VExprSPtr table_int32_greater_than_expr(int slot_id, int column_id, int32_t value) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto expr = table_function_expr("gt", make_nullable(std::make_shared<DataTypeUInt8>()),
+                                    {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED,
+                                    TExprOpcode::GT);
+    expr->add_child(table_int32_slot_ref(slot_id, column_id, "id"));
+    expr->add_child(table_int32_literal(value));
+    return expr;
+}
+
+VExprSPtr table_nullable_int64_binary_predicate(const std::string& function_name,
+                                                TExprOpcode::type opcode, int slot_id,
+                                                int column_id, const std::string& column_name,
+                                                int64_t value) {
+    const auto int64_type = std::make_shared<DataTypeInt64>();
+    const auto nullable_int64_type = make_nullable(int64_type);
+    auto expr = table_function_expr(function_name, make_nullable(std::make_shared<DataTypeUInt8>()),
+                                    {nullable_int64_type, int64_type}, TExprNodeType::BINARY_PRED,
+                                    opcode);
+    expr->add_child(
+            VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int64_type, column_name));
+    expr->add_child(table_int64_literal(value));
+    return expr;
+}
+
+class IcebergTableReaderDeleteFileTestHelper final
+        : public doris::format::iceberg::IcebergTableReader {
+public:
+    Status parse_deletion_vector_file(const TTableFormatFileDesc& t_desc, DeleteFileDesc* desc,
+                                      bool* has_delete_file) {
+        return _parse_deletion_vector_file(t_desc, desc, has_delete_file);
+    }
+};
+
+class IcebergTableReaderScanRequestTestHelper final
+        : public doris::format::iceberg::IcebergTableReader {
+public:
+    Status init_for_scan_request_test(std::vector<ColumnDefinition> projected_columns) {
+        _query_options = std::make_unique<TQueryOptions>();
+        _query_globals = std::make_unique<TQueryGlobals>();
+        _state = std::make_unique<RuntimeState>(*_query_options, *_query_globals);
+        RETURN_IF_ERROR(init({
+                .projected_columns = std::move(projected_columns),
+                .column_predicates = {},
+                .conjuncts = {},
+                .format = FileFormat::PARQUET,
+                .scan_params = nullptr,
+                .io_ctx = nullptr,
+                .runtime_state = _state.get(),
+                .scanner_profile = nullptr,
+        }));
+
+        SplitReadOptions split_options;
+        split_options.current_range.__set_path("scan-request-test.parquet");
+        TTableFormatFileDesc table_format_params;
+        TIcebergFileDesc iceberg_params;
+        iceberg_params.__set_first_row_id(1000);
+        table_format_params.__set_iceberg_params(iceberg_params);
+        split_options.current_range.__set_table_format_params(table_format_params);
+        RETURN_IF_ERROR(prepare_split(split_options));
+
+        _delete_rows_storage = {1};
+        _delete_rows = &_delete_rows_storage;
+        return Status::OK();
+    }
+
+    Status customize_request(FileScanRequest* request) {
+        return customize_file_scan_request(request);
+    }
+
+private:
+    std::unique_ptr<TQueryOptions> _query_options;
+    std::unique_ptr<TQueryGlobals> _query_globals;
+    std::unique_ptr<RuntimeState> _state;
+    DeleteRows _delete_rows_storage;
+};
+
+class IcebergTableReaderMappingModeTestHelper final
+        : public doris::format::iceberg::IcebergTableReader {
+public:
+    TableColumnMappingMode mapping_mode_for_schema(std::vector<ColumnDefinition> file_schema) {
+        _data_reader.file_schema = std::move(file_schema);
+        return mapping_mode();
+    }
+};
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_int64_array(const std::vector<int64_t>& values) {
+    arrow::Int64Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_nullable_int64_array(
+        const std::vector<std::optional<int64_t>>& values) {
+    arrow::Int64Builder builder;
+    for (const auto& value : values) {
+        if (value.has_value()) {
+            EXPECT_TRUE(builder.Append(*value).ok());
+        } else {
+            EXPECT_TRUE(builder.AppendNull().ok());
+        }
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+void write_iceberg_equality_delete_parquet_file(const std::string& file_path, int32_t field_id,
+                                                int32_t value) {
+    const auto metadata =
+            arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false)->WithMetadata(metadata),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array({value})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_iceberg_equality_delete_bigint_parquet_file(const std::string& file_path,
+                                                       int32_t field_id, int64_t value) {
+    const auto metadata =
+            arrow::key_value_metadata({"PARQUET:field_id"}, {std::to_string(field_id)});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int64(), false)->WithMetadata(metadata),
+    });
+    auto table = arrow::Table::Make(schema, {build_int64_array({value})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, const std::vector<int32_t>& ids,
+                                 const std::vector<int32_t>& scores,
+                                 const std::vector<std::string>& values,
+                                 int64_t row_group_size = -1) {
+    const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"});
+    const auto score_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"1"});
+    const auto value_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2"});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata),
+            arrow::field("score", arrow::int32(), false)->WithMetadata(score_metadata),
+            arrow::field("value", arrow::utf8(), false)->WithMetadata(value_metadata),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores),
+                                             build_string_array(values)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    const auto write_row_group_size =
+            row_group_size > 0 ? row_group_size : static_cast<int64_t>(ids.size());
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      write_row_group_size, builder.build()));
+}
+
+void write_iceberg_row_lineage_parquet_file(
+        const std::string& file_path, const std::vector<int32_t>& ids,
+        const std::vector<std::optional<int64_t>>& row_ids,
+        const std::vector<std::optional<int64_t>>& last_updated_sequence_numbers = {}) {
+    ASSERT_EQ(ids.size(), row_ids.size());
+    if (!last_updated_sequence_numbers.empty()) {
+        ASSERT_EQ(ids.size(), last_updated_sequence_numbers.size());
+    }
+    const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"});
+    const auto row_id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2147483540"});
+    const auto last_updated_sequence_number_metadata =
+            arrow::key_value_metadata({"PARQUET:field_id"}, {"2147483539"});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata),
+            arrow::field("_row_id", arrow::int64(), true)->WithMetadata(row_id_metadata),
+    });
+    std::vector<std::shared_ptr<arrow::Array>> arrays = {
+            build_int32_array(ids),
+            build_nullable_int64_array(row_ids),
+    };
+    if (!last_updated_sequence_numbers.empty()) {
+        schema =
+                schema->AddField(schema->num_fields(),
+                                 arrow::field("_last_updated_sequence_number", arrow::int64(), true)
+                                         ->WithMetadata(last_updated_sequence_number_metadata))
+                        .ValueOrDie();
+        arrays.push_back(build_nullable_int64_array(last_updated_sequence_numbers));
+    }
+    auto table = arrow::Table::Make(schema, arrays);
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      static_cast<int64_t>(ids.size()),
+                                                      builder.build()));
+}
+
+void write_position_delete_parquet_file(const std::string& file_path,
+                                        const std::vector<std::string>& data_file_paths,
+                                        const std::vector<int64_t>& positions) {
+    auto schema = arrow::schema({
+            arrow::field("file_path", arrow::utf8(), false),
+            arrow::field("pos", arrow::int64(), false),
+    });
+    auto table = arrow::Table::Make(
+            schema, {build_string_array(data_file_paths), build_int64_array(positions)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      static_cast<int64_t>(positions.size()),
+                                                      builder.build()));
+}
+
+int64_t write_iceberg_deletion_vector_file(const std::string& file_path,
+                                           const std::vector<uint64_t>& deleted_positions) {
+    roaring::Roaring64Map rows;
+    for (const auto position : deleted_positions) {
+        rows.add(position);
+    }
+
+    const size_t bitmap_size = rows.getSizeInBytes();
+    std::vector<char> blob(4 + 4 + bitmap_size + 4);
+    rows.write(blob.data() + 8);
+
+    const uint32_t total_length = static_cast<uint32_t>(4 + bitmap_size);
+    BigEndian::Store32(blob.data(), total_length);
+    constexpr char DV_MAGIC[] = {'\xD1', '\xD3', '\x39', '\x64'};
+    memcpy(blob.data() + 4, DV_MAGIC, 4);
+    BigEndian::Store32(blob.data() + 8 + bitmap_size, 0);
+
+    std::ofstream output(file_path, std::ios::binary);
+    EXPECT_TRUE(output.is_open());
+    output.write(blob.data(), static_cast<std::streamsize>(blob.size()));
+    EXPECT_TRUE(output.good());
+    return static_cast<int64_t>(blob.size());
+}
+
+Block build_table_block(const std::vector<ColumnDefinition>& columns) {
+    Block block;
+    for (const auto& column : columns) {
+        block.insert({column.type->create_column(), column.type, column.name});
+    }
+    return block;
+}
+
+void expect_nullable_int64_column_values(const IColumn& column,
+                                         const std::vector<int64_t>& expected_values) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*full_column);
+    const auto& values =
+            assert_cast<const ColumnInt64&>(nullable_column.get_nested_column()).get_data();
+    ASSERT_EQ(nullable_column.size(), expected_values.size());
+    for (size_t row = 0; row < expected_values.size(); ++row) {
+        EXPECT_EQ(nullable_column.get_null_map_data()[row], 0);
+        EXPECT_EQ(values[row], expected_values[row]);
+    }
+}
+
+void expect_nullable_int64_column_optional_values(
+        const IColumn& column, const std::vector<std::optional<int64_t>>& expected_values) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*full_column);
+    const auto& values =
+            assert_cast<const ColumnInt64&>(nullable_column.get_nested_column()).get_data();
+    ASSERT_EQ(nullable_column.size(), expected_values.size());
+    for (size_t row = 0; row < expected_values.size(); ++row) {
+        if (expected_values[row].has_value()) {
+            EXPECT_EQ(nullable_column.get_null_map_data()[row], 0);
+            EXPECT_EQ(values[row], *expected_values[row]);
+        } else {
+            EXPECT_EQ(nullable_column.get_null_map_data()[row], 1);
+        }
+    }
+}
+
+const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) {
+    if (!column.is_nullable()) {
+        return column;
+    }
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    for (const auto is_null : nullable_column.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    return nullable_column.get_nested_column();
+}
+
+const IColumn& expect_not_null_table_column(const Block& block, size_t position) {
+    return expect_not_null_nullable_nested_column(*block.get_by_position(position).column);
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type);
+
+DataTypePtr make_iceberg_rowid_type() {
+    return make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt64>(),
+                       std::make_shared<DataTypeInt32>(), std::make_shared<DataTypeString>()},
+            Strings {"file_path", "row_pos", "partition_spec_id", "partition_data_json"}));
+}
+
+ColumnDefinition make_iceberg_row_lineage_row_id_column() {
+    return make_table_column(2147483540, "_row_id",
+                             make_nullable(std::make_shared<DataTypeInt64>()));
+}
+
+ColumnDefinition make_iceberg_last_updated_sequence_number_column() {
+    return make_table_column(2147483539, "_last_updated_sequence_number",
+                             make_nullable(std::make_shared<DataTypeInt64>()));
+}
+
+void expect_iceberg_rowid_column_values(const IColumn& column, const std::string& file_path,
+                                        const std::vector<int64_t>& row_positions,
+                                        int32_t partition_spec_id,
+                                        const std::string& partition_data_json) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*full_column);
+    const auto& struct_column =
+            assert_cast<const ColumnStruct&>(nullable_column.get_nested_column());
+    const auto& file_path_column = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(struct_column.get_column(0)));
+    const auto& row_pos_column = assert_cast<const ColumnInt64&>(
+            expect_not_null_nullable_nested_column(struct_column.get_column(1)));
+    const auto& spec_id_column = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(struct_column.get_column(2)));
+    const auto& partition_data_column = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(struct_column.get_column(3)));
+
+    ASSERT_EQ(nullable_column.size(), row_positions.size());
+    for (size_t row = 0; row < row_positions.size(); ++row) {
+        EXPECT_EQ(nullable_column.get_null_map_data()[row], 0);
+        EXPECT_EQ(file_path_column.get_data_at(row).to_string(), file_path);
+        EXPECT_EQ(row_pos_column.get_element(row), row_positions[row]);
+        EXPECT_EQ(spec_id_column.get_element(row), partition_spec_id);
+        EXPECT_EQ(partition_data_column.get_data_at(row).to_string(), partition_data_json);
+    }
+}
+
+void expect_int32_column_values(const IColumn& column,
+                                const std::vector<int32_t>& expected_values) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nested_column = expect_not_null_nullable_nested_column(*full_column);
+    const auto& values = assert_cast<const ColumnInt32&>(nested_column).get_data();
+    ASSERT_EQ(values.size(), expected_values.size());
+    for (size_t row = 0; row < expected_values.size(); ++row) {
+        EXPECT_EQ(values[row], expected_values[row]);
+    }
+}
+
+SplitReadOptions build_split_options(const std::string& file_path) {
+    SplitReadOptions options;
+    options.current_range.__set_path(file_path);
+    options.current_range.__set_file_size(
+            static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    return options;
+}
+
+void set_table_level_row_count(SplitReadOptions* split_options, int64_t row_count) {
+    split_options->current_range.__isset.table_format_params = true;
+    split_options->current_range.table_format_params.__isset.table_level_row_count = true;
+    split_options->current_range.table_format_params.table_level_row_count = row_count;
+}
+
+void set_iceberg_row_lineage_params(SplitReadOptions* split_options, int64_t first_row_id,
+                                    int64_t last_updated_sequence_number) {
+    TTableFormatFileDesc table_format_params;
+    TIcebergFileDesc iceberg_params;
+    iceberg_params.__set_first_row_id(first_row_id);
+    iceberg_params.__set_last_updated_sequence_number(last_updated_sequence_number);
+    table_format_params.__set_iceberg_params(iceberg_params);
+    split_options->current_range.__set_table_format_params(table_format_params);
+}
+
+void set_iceberg_rowid_params(SplitReadOptions* split_options,
+                              const std::string& original_file_path, int32_t partition_spec_id,
+                              const std::string& partition_data_json) {
+    TTableFormatFileDesc table_format_params;
+    TIcebergFileDesc iceberg_params;
+    iceberg_params.__set_original_file_path(original_file_path);
+    iceberg_params.__set_partition_spec_id(partition_spec_id);
+    iceberg_params.__set_partition_data_json(partition_data_json);
+    table_format_params.__set_iceberg_params(iceberg_params);
+    split_options->current_range.__set_table_format_params(table_format_params);
+}
+
+TIcebergDeleteFileDesc make_iceberg_deletion_vector(const std::string& path, int64_t offset,
+                                                    int64_t size) {
+    TIcebergDeleteFileDesc delete_file;
+    delete_file.__set_content(3);
+    delete_file.__set_path(path);
+    delete_file.__set_content_offset(offset);
+    delete_file.__set_content_size_in_bytes(size);
+    return delete_file;
+}
+
+TIcebergDeleteFileDesc make_iceberg_position_delete_file(const std::string& path) {
+    TIcebergDeleteFileDesc delete_file;
+    delete_file.__set_content(1);
+    delete_file.__set_path(path);
+    delete_file.__set_file_format(TFileFormatType::FORMAT_PARQUET);
+    return delete_file;
+}
+
+TIcebergDeleteFileDesc make_iceberg_equality_delete_file(const std::string& path,
+                                                         const std::vector<int32_t>& field_ids) {
+    TIcebergDeleteFileDesc delete_file;
+    delete_file.__set_content(2);
+    delete_file.__set_path(path);
+    delete_file.__set_field_ids(field_ids);
+    delete_file.__set_file_format(TFileFormatType::FORMAT_PARQUET);
+    return delete_file;
+}
+
+TFileScanRangeParams make_local_parquet_scan_params() {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_file_type(TFileType::FILE_LOCAL);
+    scan_params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    return scan_params;
+}
+
+std::shared_ptr<io::IOContext> make_io_context(io::FileReaderStats* file_reader_stats,
+                                               io::FileCacheStatistics* file_cache_stats) {
+    auto io_ctx = std::make_shared<io::IOContext>();
+    io_ctx->file_reader_stats = file_reader_stats;
+    io_ctx->file_cache_stats = file_cache_stats;
+    return io_ctx;
+}
+
+TTableFormatFileDesc make_iceberg_table_format_desc(
+        const std::string& data_file_path,
+        const std::vector<TIcebergDeleteFileDesc>& delete_files) {
+    TTableFormatFileDesc table_format_params;
+    TIcebergFileDesc iceberg_params;
+    iceberg_params.__set_format_version(2);
+    iceberg_params.__set_original_file_path(data_file_path);
+    iceberg_params.__set_delete_files(delete_files);
+    table_format_params.__set_iceberg_params(iceberg_params);
+    return table_format_params;
+}
+
+std::vector<int32_t> read_iceberg_ids(doris::format::iceberg::IcebergTableReader* reader,
+                                      const std::vector<ColumnDefinition>& projected_columns) {
+    std::vector<int32_t> ids;
+    bool eos = false;
+    while (!eos) {
+        Block block = build_table_block(projected_columns);
+        auto status = reader->get_block(&block, &eos);
+        if (!status.ok()) {
+            ADD_FAILURE() << status;
+            return ids;
+        }
+        if (block.rows() == 0) {
+            continue;
+        }
+        const auto& id_column =
+                assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+        for (size_t row = 0; row < block.rows(); ++row) {
+            ids.push_back(id_column.get_element(row));
+        }
+    }
+    return ids;
+}
+
+DataTypePtr make_table_test_type(const DataTypePtr& type, bool nullable_root = true) {
+    DORIS_CHECK(type != nullptr);
+    const auto nested_type = remove_nullable(type);
+    DataTypePtr result;
+    if (const auto* struct_type = typeid_cast<const DataTypeStruct*>(nested_type.get())) {
+        DataTypes child_types;
+        child_types.reserve(struct_type->get_elements().size());
+        for (const auto& child_type : struct_type->get_elements()) {
+            child_types.push_back(make_table_test_type(child_type));
+        }
+        result = std::make_shared<DataTypeStruct>(child_types, struct_type->get_element_names());
+    } else if (const auto* array_type = typeid_cast<const DataTypeArray*>(nested_type.get())) {
+        result = std::make_shared<DataTypeArray>(
+                make_table_test_type(array_type->get_nested_type()));
+    } else if (const auto* map_type = typeid_cast<const DataTypeMap*>(nested_type.get())) {
+        result = std::make_shared<DataTypeMap>(make_table_test_type(map_type->get_key_type()),
+                                               make_table_test_type(map_type->get_value_type()));
+    } else {
+        result = nested_type;
+    }
+    return nullable_root ? make_nullable(result) : result;
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition column;
+    if (id >= 0) {
+        column.identifier = Field::create_field<TYPE_INT>(id);
+    }
+    column.name = name;
+    // TableReader tests model external table scan descriptors. Those table columns are nullable
+    // even when the Parquet file field itself is required, so keep the test schema aligned with
+    // the real scan contract at the construction boundary.
+    column.type = make_table_test_type(type);
+    return column;
+}
+
+ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(id);
+    field.local_id = id;
+    field.name = name;
+    field.type = make_table_test_type(type);
+    return field;
+}
+
+void set_name_identifiers(std::vector<ColumnDefinition>* columns);
+
+void set_name_identifier(ColumnDefinition* column) {
+    DORIS_CHECK(column != nullptr);
+    column->identifier = Field::create_field<TYPE_STRING>(column->name);
+    set_name_identifiers(&column->children);
+}
+
+void set_name_identifiers(std::vector<ColumnDefinition>* columns) {
+    DORIS_CHECK(columns != nullptr);
+    for (auto& column : *columns) {
+        set_name_identifier(&column);
+    }
+}
+
+void add_column_predicate(TableColumnPredicates* column_predicates, GlobalIndex global_index,
+                          std::shared_ptr<ColumnPredicate> predicate) {
+    auto& entry = (*column_predicates)[global_index];
+    entry.push_back(std::move(predicate));
+}
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto ctx = VExprContext::create_shared(expr);
+    auto status = ctx->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = ctx->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return ctx;
+}
+
+void apply_final_conjuncts(Block* block, const VExprContextSPtrs& conjuncts) {
+    const auto status = VExprContext::filter_block(conjuncts, block, block->columns());
+    ASSERT_TRUE(status.ok()) << status;
+}
+
+TEST(IcebergV2ReaderTest, IcebergVirtualColumnsUseRowLineageMetadata) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_virtual_columns_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_iceberg_last_updated_sequence_number_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(2, 2, 1))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 1000, 77);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 2));
+
+    ASSERT_EQ(block.rows(), 2);
+    EXPECT_EQ(id_column.get_element(0), 2);
+    EXPECT_EQ(id_column.get_element(1), 3);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001, 1002});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {77, 77});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergRowLineageUsesPhysicalRowIdAndFillsNulls) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_physical_row_id_fill_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002},
+                                           {80, std::nullopt, 82});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(
+            2147483540, "_row_id", make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(
+            make_table_column(2147483539, "_last_updated_sequence_number",
+                              make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 1000, 77);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.rows(), 3);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {7000, 1001, 7002});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {80, 77, 82});
+    expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergPhysicalRowIdKeepsNullsWithoutFirstRowId) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_physical_row_id_no_first_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002},
+                                           {80, std::nullopt, 82});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(
+            2147483540, "_row_id", make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(
+            make_table_column(2147483539, "_last_updated_sequence_number",
+                              make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.rows(), 3);
+    expect_nullable_int64_column_optional_values(
+            *block.get_by_position(0).column,
+            std::vector<std::optional<int64_t>> {7000, std::nullopt, 7002});
+    expect_nullable_int64_column_optional_values(
+            *block.get_by_position(1).column,
+            std::vector<std::optional<int64_t>> {80, std::nullopt, 82});
+    expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergMissingRowIdStaysNullWithoutFirstRowId) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_missing_row_id_no_first_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_iceberg_last_updated_sequence_number_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.rows(), 3);
+    expect_nullable_int64_column_optional_values(
+            *block.get_by_position(0).column,
+            std::vector<std::optional<int64_t>> {std::nullopt, std::nullopt, std::nullopt});
+    expect_nullable_int64_column_optional_values(
+            *block.get_by_position(1).column,
+            std::vector<std::optional<int64_t>> {std::nullopt, std::nullopt, std::nullopt});
+    expect_int32_column_values(*block.get_by_position(2).column, {1, 2, 3});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergRowIdPredicateFiltersAfterRowLineageMaterialization) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_row_id_finalize_filter_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002},
+                                           {80, std::nullopt, 82});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(
+            2147483540, "_row_id", make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(
+            make_table_column(2147483539, "_last_updated_sequence_number",
+                              make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    VExprContextSPtrs conjuncts = {prepared_conjunct(
+            &state,
+            table_nullable_int64_binary_predicate("eq", TExprOpcode::EQ, 0, 0, "_row_id", 1001))};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = conjuncts,
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 1000, 77);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+
+    apply_final_conjuncts(&block, conjuncts);
+    ASSERT_EQ(block.rows(), 1);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {77});
+    expect_int32_column_values(*block.get_by_position(2).column, {2});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergLastUpdatedSequencePredicateFiltersAfterMaterialization) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_sequence_finalize_filter_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_iceberg_row_lineage_parquet_file(file_path, {1, 2, 3}, {7000, std::nullopt, 7002},
+                                           {80, std::nullopt, 82});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(
+            2147483540, "_row_id", make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(
+            make_table_column(2147483539, "_last_updated_sequence_number",
+                              make_nullable(std::make_shared<DataTypeInt64>())));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    VExprContextSPtrs conjuncts = {prepared_conjunct(
+            &state, table_nullable_int64_binary_predicate("eq", TExprOpcode::EQ, 1, 1,
+                                                          "_last_updated_sequence_number", 77))};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = conjuncts,
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 1000, 77);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+
+    apply_final_conjuncts(&block, conjuncts);
+    ASSERT_EQ(block.rows(), 1);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {1001});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {77});
+    expect_int32_column_values(*block.get_by_position(2).column, {2});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergRowidVirtualColumnUsesDataFilePosition) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_rowid_virtual_column_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(
+            make_table_column(-1, BeConsts::ICEBERG_ROWID_COL, make_iceberg_rowid_type()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(1, 1, 1))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    const auto original_file_path = "s3://bucket/table/data/original.parquet";
+    const auto partition_data_json = R"({"part":"p1"})";
+    set_iceberg_rowid_params(&split_options, original_file_path, 17, partition_data_json);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.rows(), 2);
+    expect_iceberg_rowid_column_values(*block.get_by_position(0).column, original_file_path, {1, 2},
+                                       17, partition_data_json);
+    expect_int32_column_values(*block.get_by_position(1).column, {2, 3});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergVirtualColumnsKeepRowLineageAfterConjunctFiltering) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_virtual_columns_conjunct_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_iceberg_last_updated_sequence_number_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(2, 2, 1))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 3000, 88);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 2));
+
+    ASSERT_EQ(block.rows(), 2);
+    EXPECT_EQ(id_column.get_element(0), 2);
+    EXPECT_EQ(id_column.get_element(1), 3);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {3001, 3002});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {88, 88});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergVirtualColumnsKeepRowLineageAfterRowGroupPredicatePruning) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_iceberg_virtual_columns_row_group_predicate_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    // ColumnPredicate is used for row-group/statistics pruning. Keep one row per row group so
+    // id > 2 prunes the first two row groups and leaves only the third file-local row.
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_iceberg_last_updated_sequence_number_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(2),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(2), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    set_iceberg_row_lineage_params(&split_options, 4000, 99);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 2));
+
+    ASSERT_EQ(block.rows(), 1);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {4002});
+    expect_nullable_int64_column_values(*block.get_by_position(1).column, {99});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergDeletionVectorUsesTableReaderDeleteFileInterface) {
+    TTableFormatFileDesc table_format_desc;
+    TIcebergFileDesc iceberg_desc;
+    iceberg_desc.__set_format_version(2);
+    iceberg_desc.__set_delete_files({make_iceberg_deletion_vector("dv.bin", 8, 128)});
+    table_format_desc.__set_iceberg_params(iceberg_desc);
+
+    IcebergTableReaderDeleteFileTestHelper reader;
+    DeleteFileDesc desc;
+    bool has_delete_file = false;
+    ASSERT_TRUE(reader.parse_deletion_vector_file(table_format_desc, &desc, &has_delete_file).ok());
+
+    EXPECT_TRUE(has_delete_file);
+    EXPECT_EQ(desc.path, "dv.bin");
+    EXPECT_EQ(desc.start_offset, 8);
+    EXPECT_EQ(desc.size, 128);
+    EXPECT_EQ(desc.file_size, -1);
+    EXPECT_EQ(desc.format, DeleteFileDesc::Format::ICEBERG);
+}
+
+TEST(IcebergV2ReaderTest, IcebergDeletionVectorRejectsMultipleDeleteFiles) {
+    TTableFormatFileDesc table_format_desc;
+    TIcebergFileDesc iceberg_desc;
+    iceberg_desc.__set_format_version(2);
+    iceberg_desc.__set_delete_files({make_iceberg_deletion_vector("dv-a.bin", 8, 128),
+                                     make_iceberg_deletion_vector("dv-b.bin", 16, 256)});
+    table_format_desc.__set_iceberg_params(iceberg_desc);
+
+    IcebergTableReaderDeleteFileTestHelper reader;
+    DeleteFileDesc desc;
+    bool has_delete_file = false;
+    auto status = reader.parse_deletion_vector_file(table_format_desc, &desc, &has_delete_file);
+
+    EXPECT_FALSE(status.ok());
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderAppliesDeletionVectorFile) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_deletion_vector_file_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto dv_path = (test_dir / "delete-vector.bin").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"});
+    const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0, 4});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({2, 3, 4}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithDeletes) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_delete_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto dv_path = (test_dir / "delete-vector.bin").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 2);
+    EXPECT_EQ(id_column.get_element(1), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+// Covers TopN lazy materialization on Iceberg schema-evolution tables. The first-phase scan adds a
+// synthesized GLOBAL_ROWID column to the file schema. That virtual column must not make Iceberg
+// fall back from field-id mapping to name mapping, otherwise renamed columns are read as defaults
+// from old files.
+TEST(IcebergV2ReaderTest, IcebergMappingModeIgnoresGlobalRowIdVirtualColumn) {
+    IcebergTableReaderMappingModeTestHelper reader;
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(1, "id", std::make_shared<DataTypeInt32>()),
+            make_file_column(2, "name", std::make_shared<DataTypeString>()),
+            global_rowid_column_definition(),
+    };
+
+    EXPECT_EQ(reader.mapping_mode_for_schema(std::move(file_schema)),
+              TableColumnMappingMode::BY_FIELD_ID);
+}
+
+// Covers the fallback side of the previous case. Only synthesized columns are ignored; a real data
+// column without an Iceberg field id still disables field-id mapping.
+TEST(IcebergV2ReaderTest, IcebergMappingModeRequiresFieldIdsForDataColumns) {
+    IcebergTableReaderMappingModeTestHelper reader;
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(1, "id", std::make_shared<DataTypeInt32>()),
+            make_file_column(2, "name", std::make_shared<DataTypeString>()),
+            global_rowid_column_definition(),
+    };
+    file_schema[1].identifier = Field {};
+
+    EXPECT_EQ(reader.mapping_mode_for_schema(std::move(file_schema)),
+              TableColumnMappingMode::BY_NAME);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithPositionDelete) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_position_delete_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {file_path}, {1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableLevelCountUsesAssignedRowCountWithPositionDelete) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_table_level_count_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {file_path}, {1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TQueryOptions query_options;
+    query_options.__set_batch_size(10);
+    RuntimeState state {query_options, TQueryGlobals()};
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)}));
+    set_table_level_row_count(&split_options, 5);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.rows(), 5);
+
+    block = build_table_block(projected_columns);
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.rows(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergPositionDeleteFallsBackToSplitPath) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_position_delete_path_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {file_path}, {1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    TTableFormatFileDesc table_format_params;
+    TIcebergFileDesc iceberg_params;
+    iceberg_params.__set_format_version(2);
+    iceberg_params.__set_delete_files({make_iceberg_position_delete_file(delete_file_path)});
+    table_format_params.__set_iceberg_params(iceberg_params);
+    split_options.current_range.__set_table_format_params(table_format_params);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({1, 3}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderDoesNotPushDownAggregateWithEqualityDelete) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_aggregate_equality_delete_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "equality-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_iceberg_equality_delete_parquet_file(delete_file_path, 0, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_equality_delete_file(delete_file_path, {0})}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergEqualityDeleteCastsDataColumnToDeleteKeyType) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_equality_delete_cast_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "equality-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_iceberg_equality_delete_bigint_parquet_file(delete_file_path, 0, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_equality_delete_file(delete_file_path, {0})}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({1, 3}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergPositionDeleteOnlyMatchesOriginalDataFilePath) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_iceberg_position_delete_path_match_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto other_file_path = (test_dir / "other.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {other_file_path, file_path}, {0, 1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({1, 3}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergRowLineageRemainsFileLocalAfterDeleteFiltering) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_row_lineage_delete_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+    write_position_delete_parquet_file(delete_file_path, {file_path}, {1});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    TTableFormatFileDesc table_format_params = make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)});
+    table_format_params.iceberg_params.__set_first_row_id(1000);
+    split_options.current_range.__set_table_format_params(table_format_params);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    expect_nullable_int64_column_values(*block.get_by_position(0).column, {1000, 1002});
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderAppliesPositionDeleteFile) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_position_delete_file_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto delete_file_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"});
+    write_position_delete_parquet_file(delete_file_path, {file_path, file_path}, {1, 3});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_position_delete_file(delete_file_path)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({1, 3, 5}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, IcebergTableReaderMergesDeletionVectorAndPositionDeleteFiles) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_iceberg_delete_files_merge_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto dv_path = (test_dir / "delete-vector.bin").string();
+    const auto position_delete_path = (test_dir / "position-delete.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"});
+    const auto dv_size = write_iceberg_deletion_vector_file(dv_path, {0});
+    write_position_delete_parquet_file(position_delete_path, {file_path, file_path}, {3, 3});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    doris::format::iceberg::IcebergTableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(make_iceberg_table_format_desc(
+            file_path, {make_iceberg_deletion_vector(dv_path, 0, dv_size),
+                        make_iceberg_position_delete_file(position_delete_path)}));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    EXPECT_EQ(read_iceberg_ids(&reader, projected_columns), std::vector<int32_t>({2, 3, 5}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(IcebergV2ReaderTest, RowPositionDeletePredicateColumnIsNotRepeatedAsOutputColumn) {
+    const auto row_position_column_id = ROW_POSITION_COLUMN_ID;
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_iceberg_row_lineage_row_id_column());
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    IcebergTableReaderScanRequestTestHelper reader;
+    ASSERT_TRUE(reader.init_for_scan_request_test(projected_columns).ok());
+
+    FileScanRequest request;
+    request.non_predicate_columns.push_back(field_projection(0));
+    request.local_positions.emplace(LocalColumnId(0), LocalIndex(0));
+
+    ASSERT_TRUE(reader.customize_request(&request).ok());
+
+    EXPECT_EQ(projection_ids(request.predicate_columns),
+              std::vector<int32_t>({row_position_column_id}));
+    EXPECT_EQ(projection_ids(request.non_predicate_columns), std::vector<int32_t>({0}));
+    ASSERT_TRUE(request.local_positions.contains(LocalColumnId(row_position_column_id)));
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(row_position_column_id)).value(), 1);
+    ASSERT_TRUE(request.conjuncts.empty());
+    ASSERT_EQ(request.delete_conjuncts.size(), 1);
+    EXPECT_NE(request.delete_conjuncts[0], nullptr);
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/table/paimon_reader_test.cpp b/be/test/format_v2/table/paimon_reader_test.cpp
new file mode 100644
index 00000000000000..fce0244c1738bd
--- /dev/null
+++ b/be/test/format_v2/table/paimon_reader_test.cpp
@@ -0,0 +1,539 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/paimon_reader.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <cstring>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/field.h"
+#include "exec/common/endian.h"
+#include "format/format_common.h"
+#include "format_v2/column_data.h"
+#include "gen_cpp/ExternalTableSchema_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/io_common.h"
+#include "roaring/roaring.hh"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+
+namespace doris::format {
+namespace {
+
+DataTypePtr table_type(const DataTypePtr& type) {
+    return type->is_nullable() ? type : make_nullable(type);
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_INT>(id);
+    column.name = name;
+    column.type = table_type(type);
+    return column;
+}
+
+ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition column;
+    column.identifier = Field::create_field<TYPE_INT>(id);
+    column.local_id = id;
+    column.name = name;
+    column.type = type;
+    return column;
+}
+
+schema::external::TFieldPtr external_schema_field(std::string name, int32_t id,
+                                                  std::vector<std::string> aliases = {}) {
+    auto field = std::make_shared<schema::external::TField>();
+    field->__set_name(std::move(name));
+    field->__set_id(id);
+    if (!aliases.empty()) {
+        field->__set_name_mapping(std::move(aliases));
+    }
+    schema::external::TFieldPtr field_ptr;
+    field_ptr.field_ptr = std::move(field);
+    field_ptr.__isset.field_ptr = true;
+    return field_ptr;
+}
+
+schema::external::TFieldPtr external_array_field(std::string name, int32_t id,
+                                                 schema::external::TFieldPtr item_field,
+                                                 std::vector<std::string> aliases = {}) {
+    auto field = external_schema_field(std::move(name), id, std::move(aliases));
+    schema::external::TArrayField array_field;
+    array_field.__set_item_field(std::move(item_field));
+    field.field_ptr->nestedField.__set_array_field(std::move(array_field));
+    field.field_ptr->__isset.nestedField = true;
+    return field;
+}
+
+schema::external::TFieldPtr external_map_field(std::string name, int32_t id,
+                                               schema::external::TFieldPtr key_field,
+                                               schema::external::TFieldPtr value_field,
+                                               std::vector<std::string> aliases = {}) {
+    auto field = external_schema_field(std::move(name), id, std::move(aliases));
+    schema::external::TMapField map_field;
+    map_field.__set_key_field(std::move(key_field));
+    map_field.__set_value_field(std::move(value_field));
+    field.field_ptr->nestedField.__set_map_field(std::move(map_field));
+    field.field_ptr->__isset.nestedField = true;
+    return field;
+}
+
+schema::external::TSchema external_schema(int64_t schema_id,
+                                          std::vector<schema::external::TFieldPtr> fields) {
+    schema::external::TStructField root_field;
+    root_field.__set_fields(std::move(fields));
+    schema::external::TSchema schema;
+    schema.__set_schema_id(schema_id);
+    schema.__set_root_field(std::move(root_field));
+    return schema;
+}
+
+Block build_table_block(const std::vector<ColumnDefinition>& columns) {
+    Block block;
+    for (const auto& column : columns) {
+        block.insert({column.type->create_column(), column.type, column.name});
+    }
+    return block;
+}
+
+const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) {
+    if (!column.is_nullable()) {
+        return column;
+    }
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    for (const auto is_null : nullable_column.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    return nullable_column.get_nested_column();
+}
+
+const IColumn& expect_not_null_table_column(const Block& block, size_t position) {
+    return expect_not_null_nullable_nested_column(*block.get_by_position(position).column);
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder.Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder.Finish(&array).ok());
+    return array;
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, const std::vector<int32_t>& ids,
+                                 const std::vector<int32_t>& scores,
+                                 const std::vector<std::string>& values) {
+    ASSERT_EQ(ids.size(), scores.size());
+    ASSERT_EQ(ids.size(), values.size());
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("score", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores),
+                                             build_string_array(values)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      static_cast<int64_t>(ids.size()),
+                                                      builder.build()));
+}
+
+int64_t write_paimon_deletion_vector_file(const std::string& file_path,
+                                          const std::vector<uint32_t>& deleted_positions) {
+    roaring::Roaring rows;
+    for (const auto position : deleted_positions) {
+        rows.add(position);
+    }
+
+    const size_t bitmap_size = rows.getSizeInBytes();
+    const uint32_t total_length = static_cast<uint32_t>(4 + bitmap_size);
+    std::vector<char> blob(4 + total_length);
+    BigEndian::Store32(blob.data(), total_length);
+    constexpr char PAIMON_BITMAP_MAGIC[] = {'\x5E', '\x43', '\xF2', '\xD0'};
+    memcpy(blob.data() + 4, PAIMON_BITMAP_MAGIC, 4);
+    rows.write(blob.data() + 8);
+
+    std::ofstream output(file_path, std::ios::binary);
+    EXPECT_TRUE(output.is_open());
+    output.write(blob.data(), static_cast<std::streamsize>(blob.size()));
+    EXPECT_TRUE(output.good());
+    // Paimon DeletionFile.length is magic + bitmap length, excluding the leading length field.
+    return static_cast<int64_t>(total_length);
+}
+
+TFileScanRangeParams make_local_parquet_scan_params() {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_file_type(TFileType::FILE_LOCAL);
+    scan_params.__set_format_type(TFileFormatType::FORMAT_PARQUET);
+    return scan_params;
+}
+
+std::shared_ptr<io::IOContext> make_io_context(io::FileReaderStats* file_reader_stats,
+                                               io::FileCacheStatistics* file_cache_stats) {
+    auto io_ctx = std::make_shared<io::IOContext>();
+    io_ctx->file_reader_stats = file_reader_stats;
+    io_ctx->file_cache_stats = file_cache_stats;
+    return io_ctx;
+}
+
+SplitReadOptions build_split_options(const std::string& file_path) {
+    SplitReadOptions options;
+    options.current_range.__set_path(file_path);
+    options.current_range.__set_file_size(
+            static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    return options;
+}
+
+TTableFormatFileDesc make_paimon_table_format_desc(const std::string& deletion_file_path,
+                                                   int64_t offset, int64_t length) {
+    TTableFormatFileDesc table_format_params;
+    TPaimonFileDesc paimon_params;
+    paimon_params.__set_file_format("parquet");
+    TPaimonDeletionFileDesc deletion_file;
+    deletion_file.__set_path(deletion_file_path);
+    deletion_file.__set_offset(offset);
+    deletion_file.__set_length(length);
+    paimon_params.__set_deletion_file(deletion_file);
+    table_format_params.__set_paimon_params(paimon_params);
+    return table_format_params;
+}
+
+TTableFormatFileDesc make_paimon_schema_table_format_desc(int64_t schema_id) {
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("paimon");
+    TPaimonFileDesc paimon_params;
+    paimon_params.__set_file_format("parquet");
+    paimon_params.__set_schema_id(schema_id);
+    table_format_params.__set_paimon_params(paimon_params);
+    return table_format_params;
+}
+
+TFileRangeDesc make_paimon_native_range(TFileFormatType::type format_type) {
+    TFileRangeDesc range;
+    range.__set_path(format_type == TFileFormatType::FORMAT_ORC ? "s3://bucket/native.orc"
+                                                                : "s3://bucket/native.parquet");
+    range.__set_format_type(format_type);
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("paimon");
+    TPaimonFileDesc paimon_params;
+    paimon_params.__set_file_format(format_type == TFileFormatType::FORMAT_ORC ? "orc" : "parquet");
+    paimon_params.__set_reader_type(TPaimonReaderType::PAIMON_NATIVE);
+    table_format_params.__set_paimon_params(paimon_params);
+    range.__set_table_format_params(table_format_params);
+    return range;
+}
+
+TFileRangeDesc make_paimon_jni_range() {
+    TFileRangeDesc range;
+    range.__set_path("/data-placeholder.parquet");
+    range.__set_format_type(TFileFormatType::FORMAT_JNI);
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("paimon");
+    TPaimonFileDesc paimon_params;
+    paimon_params.__set_file_format("parquet");
+    paimon_params.__set_reader_type(TPaimonReaderType::PAIMON_JNI);
+    paimon_params.__set_paimon_split("serialized-paimon-split");
+    table_format_params.__set_paimon_params(paimon_params);
+    range.__set_table_format_params(table_format_params);
+    return range;
+}
+
+TFileRangeDesc make_paimon_range_without_reader_type(TFileFormatType::type format_type) {
+    TFileRangeDesc range = make_paimon_native_range(format_type);
+    range.table_format_params.paimon_params.__isset.reader_type = false;
+    return range;
+}
+
+// Scenario: PaimonReader shares Hudi's history-schema annotation path. A split whose schema id
+// resolves to a historical schema should use field-id mapping and annotate array/map children so
+// TableColumnMapper can match evolved physical Parquet columns by id instead of by the old names.
+TEST(PaimonReaderTest, AnnotatesArrayAndMapFileSchemaFromSplitHistorySchema) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(
+                    100,
+                    {external_array_field("old_tags", 30,
+                                          external_schema_field("old_item", 31, {"tag"}), {"tags"}),
+                     external_map_field(
+                             "old_props", 40, external_schema_field("old_key", 41, {"key"}),
+                             external_schema_field("old_value", 42, {"score"}), {"props"})}),
+            external_schema(
+                    200, {external_schema_field("tags", 30), external_schema_field("props", 40)}),
+    });
+
+    paimon::PaimonReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_table_format_params(
+            make_paimon_schema_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID);
+
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto int_type = std::make_shared<DataTypeInt32>();
+
+    auto tags = make_file_column(0, "old_tags", std::make_shared<DataTypeArray>(string_type));
+    tags.children = {make_file_column(0, "old_item", string_type)};
+
+    auto props =
+            make_file_column(1, "old_props", std::make_shared<DataTypeMap>(string_type, int_type));
+    props.children = {make_file_column(0, "old_key", string_type),
+                      make_file_column(1, "old_value", int_type)};
+
+    std::vector<ColumnDefinition> file_schema {tags, props};
+    ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok());
+
+    ASSERT_EQ(file_schema.size(), 2);
+    EXPECT_EQ(file_schema[0].get_identifier_field_id(), 30);
+    EXPECT_EQ(file_schema[0].name_mapping, std::vector<std::string>({"tags"}));
+    ASSERT_EQ(file_schema[0].children.size(), 1);
+    EXPECT_EQ(file_schema[0].children[0].get_identifier_field_id(), 31);
+    EXPECT_EQ(file_schema[0].children[0].name_mapping, std::vector<std::string>({"tag"}));
+
+    EXPECT_EQ(file_schema[1].get_identifier_field_id(), 40);
+    EXPECT_EQ(file_schema[1].name_mapping, std::vector<std::string>({"props"}));
+    ASSERT_EQ(file_schema[1].children.size(), 2);
+    EXPECT_EQ(file_schema[1].children[0].get_identifier_field_id(), 41);
+    EXPECT_EQ(file_schema[1].children[0].name_mapping, std::vector<std::string>({"key"}));
+    EXPECT_EQ(file_schema[1].children[1].get_identifier_field_id(), 42);
+    EXPECT_EQ(file_schema[1].children[1].name_mapping, std::vector<std::string>({"score"}));
+}
+
+// Scenario: when FE does not send a matching historical schema for the split schema id, Paimon must
+// stay on BY_NAME mapping and must not rewrite the file schema identifiers.
+TEST(PaimonReaderTest, FallsBackToByNameWhenSplitHistorySchemaIsMissing) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(200, {external_schema_field("name", 10)}),
+    });
+
+    paimon::PaimonReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_table_format_params(
+            make_paimon_schema_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME);
+
+    std::vector<ColumnDefinition> file_schema {
+            make_file_column(0, "old_name", std::make_shared<DataTypeString>()),
+    };
+    ASSERT_TRUE(reader.TEST_annotate_file_schema(&file_schema).ok());
+    EXPECT_EQ(file_schema[0].get_identifier_field_id(), 0);
+    EXPECT_TRUE(file_schema[0].name_mapping.empty());
+}
+
+// Scenario: PaimonReader must clear the previous split schema id before reading a new split. A
+// schema-evolved split must not force the following split without schema id to keep BY_FIELD_ID.
+TEST(PaimonReaderTest, ResetsSplitSchemaIdBeforePreparingNextSplit) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+    scan_params.__set_history_schema_info({
+            external_schema(100, {external_schema_field("old_name", 10, {"name"})}),
+            external_schema(200, {external_schema_field("name", 10)}),
+    });
+
+    paimon::PaimonReader reader;
+    reader.TEST_set_scan_params(&scan_params);
+
+    SplitReadOptions split_with_schema_id;
+    split_with_schema_id.current_range.__set_table_format_params(
+            make_paimon_schema_table_format_desc(100));
+    ASSERT_TRUE(reader.prepare_split(split_with_schema_id).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_FIELD_ID);
+
+    SplitReadOptions split_without_schema_id;
+    TTableFormatFileDesc table_format_params;
+    table_format_params.__set_table_format_type("paimon");
+    table_format_params.__set_paimon_params(TPaimonFileDesc {});
+    split_without_schema_id.current_range.__set_table_format_params(table_format_params);
+    ASSERT_TRUE(reader.prepare_split(split_without_schema_id).ok());
+    EXPECT_EQ(reader.TEST_mapping_mode(), TableColumnMappingMode::BY_NAME);
+}
+
+// Scenario: Paimon reader should parse its bitmap deletion vector and let TableReader apply the
+// generated row-position delete predicate before returning table rows.
+TEST(PaimonReaderTest, AppliesBitmapDeletionVectorFile) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_paimon_deletion_vector_file_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    const auto dv_path = (test_dir / "delete-vector.bin").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"});
+    const auto dv_length = write_paimon_deletion_vector_file(dv_path, {0, 4});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+    ShardedKVCache cache(1);
+    paimon::PaimonReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.cache = &cache;
+    split_options.current_range.__set_table_format_params(
+            make_paimon_table_format_desc(dv_path, 0, dv_length));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    std::vector<int32_t> ids;
+    bool eos = false;
+    while (!eos) {
+        Block block = build_table_block(projected_columns);
+        ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+        if (block.rows() == 0) {
+            continue;
+        }
+        const auto& id_column =
+                assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+        for (size_t row = 0; row < block.rows(); ++row) {
+            ids.push_back(id_column.get_element(row));
+        }
+    }
+    EXPECT_EQ(ids, std::vector<int32_t>({2, 3, 4}));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(PaimonHybridReaderTest, ClassifiesJniSplitByReaderType) {
+    EXPECT_FALSE(paimon::PaimonHybridReader::TEST_is_jni_split(
+            make_paimon_native_range(TFileFormatType::FORMAT_PARQUET)));
+    EXPECT_FALSE(paimon::PaimonHybridReader::TEST_is_jni_split(
+            make_paimon_range_without_reader_type(TFileFormatType::FORMAT_JNI)));
+    EXPECT_TRUE(paimon::PaimonHybridReader::TEST_is_jni_split(make_paimon_jni_range()));
+}
+
+TEST(PaimonHybridReaderTest, ConvertsNativeSplitFileFormat) {
+    FileFormat file_format;
+    ASSERT_TRUE(paimon::PaimonHybridReader::TEST_to_file_format(
+                        make_paimon_native_range(TFileFormatType::FORMAT_PARQUET), &file_format)
+                        .ok());
+    EXPECT_EQ(file_format, FileFormat::PARQUET);
+
+    ASSERT_TRUE(paimon::PaimonHybridReader::TEST_to_file_format(
+                        make_paimon_native_range(TFileFormatType::FORMAT_ORC), &file_format)
+                        .ok());
+    EXPECT_EQ(file_format, FileFormat::ORC);
+
+    auto status =
+            paimon::PaimonHybridReader::TEST_to_file_format(make_paimon_jni_range(), &file_format);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(std::string::npos, status.to_string().find("Unsupported native Paimon file format"));
+}
+
+TEST(PaimonHybridReaderTest, DispatchesNativeThenJniSplitToMatchingReader) {
+    RuntimeProfile profile("test_profile");
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto scan_params = make_local_parquet_scan_params();
+    io::FileReaderStats file_reader_stats;
+    io::FileCacheStatistics file_cache_stats;
+    auto io_ctx = make_io_context(&file_reader_stats, &file_cache_stats);
+
+    paimon::PaimonHybridReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = {},
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = &scan_params,
+                                    .io_ctx = io_ctx,
+                                    .runtime_state = &state,
+                                    .scanner_profile = &profile,
+                            })
+                        .ok());
+
+    SplitReadOptions native_split;
+    native_split.current_range = make_paimon_native_range(TFileFormatType::FORMAT_PARQUET);
+    native_split.current_split_format = FileFormat::PARQUET;
+    ASSERT_TRUE(reader.prepare_split(native_split).ok());
+
+    SplitReadOptions jni_split;
+    jni_split.current_range = make_paimon_jni_range();
+    jni_split.current_split_format = FileFormat::JNI;
+    auto status = reader.prepare_split(jni_split);
+    EXPECT_FALSE(status.ok());
+    EXPECT_NE(std::string::npos, status.to_string().find("missing serialized_table"));
+
+    ASSERT_TRUE(reader.close().ok());
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/table/remote_doris_reader_test.cpp b/be/test/format_v2/table/remote_doris_reader_test.cpp
new file mode 100644
index 00000000000000..b17f82f505c2c9
--- /dev/null
+++ b/be/test/format_v2/table/remote_doris_reader_test.cpp
@@ -0,0 +1,470 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table/remote_doris_reader.h"
+
+#include <arrow/api.h>
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+#include "common/object_pool.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/vexpr.h"
+#include "exprs/vexpr_context.h"
+#include "format_v2/file_reader.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/file_factory.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+#include "testutil/desc_tbl_builder.h"
+
+namespace doris::format::remote_doris {
+namespace {
+
+class BatchRemoteDorisStream final : public RemoteDorisStream {
+public:
+    BatchRemoteDorisStream(std::vector<std::shared_ptr<arrow::RecordBatch>> batches,
+                           std::shared_ptr<int> close_count)
+            : _batches(std::move(batches)), _close_count(std::move(close_count)) {}
+
+    Status next(std::shared_ptr<arrow::RecordBatch>* batch) override {
+        DORIS_CHECK(batch != nullptr);
+        if (_next_batch >= _batches.size()) {
+            *batch = nullptr;
+            return Status::OK();
+        }
+        *batch = _batches[_next_batch++];
+        return Status::OK();
+    }
+
+    Status close() override {
+        ++(*_close_count);
+        return Status::OK();
+    }
+
+private:
+    std::vector<std::shared_ptr<arrow::RecordBatch>> _batches;
+    std::shared_ptr<int> _close_count;
+    size_t _next_batch = 0;
+};
+
+TFileRangeDesc remote_doris_range() {
+    TRemoteDorisFileDesc remote_desc;
+    remote_desc.__set_location_uri("grpc://127.0.0.1:9050");
+    remote_desc.__set_ticket("ticket-bytes");
+
+    TTableFormatFileDesc table_desc;
+    table_desc.__set_table_format_type("remote_doris");
+    table_desc.__set_remote_doris_params(std::move(remote_desc));
+
+    TFileRangeDesc range;
+    range.__set_format_type(TFileFormatType::FORMAT_ARROW);
+    range.__set_path("/dummyPath");
+    range.__set_table_format_params(std::move(table_desc));
+    return range;
+}
+
+std::vector<SlotDescriptor*> remote_slots(ObjectPool* pool, DescriptorTbl** desc_tbl) {
+    DescriptorTblBuilder builder(pool);
+    builder.declare_tuple() << std::make_tuple(std::make_shared<DataTypeInt32>(), std::string("id"))
+                            << std::make_tuple(std::make_shared<DataTypeString>(),
+                                               std::string("name"));
+    *desc_tbl = builder.build();
+    return (*desc_tbl)->get_tuple_descriptor(0)->slots();
+}
+
+TSlotDescriptor remote_complex_slot_descriptor(int id, const DataTypePtr& type,
+                                               const std::string& name) {
+    TSlotDescriptor slot_desc;
+    slot_desc.__set_id(id);
+    slot_desc.__set_parent(0);
+    slot_desc.__set_slotType(type->to_thrift());
+    slot_desc.__set_byteOffset(0);
+    slot_desc.__set_nullIndicatorByte(id / 8);
+    slot_desc.__set_nullIndicatorBit(id % 8);
+    slot_desc.__set_slotIdx(id);
+    slot_desc.__set_columnPos(id);
+    slot_desc.__set_isMaterialized(true);
+    slot_desc.__set_is_key(false);
+    slot_desc.__set_colName(name);
+    slot_desc.__set_col_unique_id(id);
+    return slot_desc;
+}
+
+std::vector<SlotDescriptor*> remote_complex_slots(ObjectPool* pool, DescriptorTbl** desc_tbl) {
+    const auto string_type = make_nullable(std::make_shared<DataTypeString>());
+    const auto int_type = make_nullable(std::make_shared<DataTypeInt32>());
+    const auto array_type = make_nullable(std::make_shared<DataTypeArray>(string_type));
+    const auto map_type = make_nullable(std::make_shared<DataTypeMap>(string_type, int_type));
+    const auto struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, make_nullable(std::make_shared<DataTypeFloat32>()), string_type},
+            Strings {"f1", "f2", "f3"}));
+
+    TDescriptorTable thrift_desc_tbl;
+    TTupleDescriptor tuple_desc;
+    tuple_desc.__set_id(0);
+    tuple_desc.__set_byteSize(0);
+    tuple_desc.__set_numNullBytes(1);
+    thrift_desc_tbl.tupleDescriptors.push_back(std::move(tuple_desc));
+    thrift_desc_tbl.slotDescriptors.push_back(
+            remote_complex_slot_descriptor(0, array_type, "c_array_s"));
+    thrift_desc_tbl.slotDescriptors.push_back(remote_complex_slot_descriptor(1, map_type, "c_map"));
+    thrift_desc_tbl.slotDescriptors.push_back(
+            remote_complex_slot_descriptor(2, struct_type, "c_struct"));
+    auto status = DescriptorTbl::create(pool, thrift_desc_tbl, desc_tbl);
+    EXPECT_TRUE(status.ok()) << status;
+    return (*desc_tbl)->get_tuple_descriptor(0)->slots();
+}
+
+std::shared_ptr<arrow::RecordBatch> make_batch(const std::vector<std::string>& names) {
+    arrow::Int32Builder id_builder;
+    EXPECT_TRUE(id_builder.Append(10).ok());
+    EXPECT_TRUE(id_builder.Append(20).ok());
+    std::shared_ptr<arrow::Array> id_array;
+    EXPECT_TRUE(id_builder.Finish(&id_array).ok());
+
+    arrow::StringBuilder name_builder;
+    EXPECT_TRUE(name_builder.Append("alice").ok());
+    EXPECT_TRUE(name_builder.Append("bob").ok());
+    std::shared_ptr<arrow::Array> name_array;
+    EXPECT_TRUE(name_builder.Finish(&name_array).ok());
+
+    std::vector<std::shared_ptr<arrow::Field>> fields;
+    std::vector<std::shared_ptr<arrow::Array>> arrays;
+    for (const auto& name : names) {
+        if (name == "id") {
+            fields.push_back(arrow::field("id", arrow::int32()));
+            arrays.push_back(id_array);
+        } else if (name == "name") {
+            fields.push_back(arrow::field("name", arrow::utf8()));
+            arrays.push_back(name_array);
+        } else {
+            fields.push_back(arrow::field(name, arrow::int32()));
+            arrays.push_back(id_array);
+        }
+    }
+    return arrow::RecordBatch::Make(arrow::schema(std::move(fields)), 2, std::move(arrays));
+}
+
+std::unique_ptr<RemoteDorisFileReader> create_reader(
+        RuntimeProfile* profile, const TFileRangeDesc& range,
+        const std::vector<SlotDescriptor*>& slots,
+        std::vector<std::shared_ptr<arrow::RecordBatch>> batches, std::shared_ptr<int> close_count,
+        std::shared_ptr<io::IOContext> io_ctx = nullptr) {
+    auto system_properties = std::make_shared<io::FileSystemProperties>();
+    auto file_description = std::make_unique<io::FileDescription>();
+    file_description->path = "/dummyPath";
+    auto factory = [batches = std::move(batches), close_count](
+                           const TFileRangeDesc&,
+                           std::unique_ptr<RemoteDorisStream>* stream) mutable {
+        *stream = std::make_unique<BatchRemoteDorisStream>(std::move(batches), close_count);
+        return Status::OK();
+    };
+    return std::make_unique<RemoteDorisFileReader>(system_properties, file_description,
+                                                   std::move(io_ctx), profile, range, slots,
+                                                   std::move(factory));
+}
+
+Block make_request_block(const std::vector<ColumnDefinition>& schema,
+                         const std::vector<int32_t>& local_ids) {
+    Block block;
+    for (const auto local_id : local_ids) {
+        const auto it = std::find_if(schema.begin(), schema.end(), [&](const auto& column) {
+            return column.local_id == local_id;
+        });
+        DORIS_CHECK(it != schema.end());
+        block.insert({it->type->create_column(), it->type, it->name});
+    }
+    return block;
+}
+
+int32_t nullable_int_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+    return nested.get_data()[row];
+}
+
+std::string nullable_string_at(const IColumn& column, size_t row) {
+    const auto& nullable = assert_cast<const ColumnNullable&>(column);
+    const auto& nested = assert_cast<const ColumnString&>(nullable.get_nested_column());
+    return nested.get_data_at(row).to_string();
+}
+
+class NullableIntGreaterThanExpr final : public VExpr {
+public:
+    NullableIntGreaterThanExpr(size_t block_position, int32_t value)
+            : VExpr(std::make_shared<DataTypeUInt8>(), false),
+              _block_position(block_position),
+              _value(value) {}
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block* block, const Selector* selector,
+                               size_t count, ColumnPtr& result_column) const override {
+        DORIS_CHECK(block != nullptr);
+        const auto& nullable =
+                assert_cast<const ColumnNullable&>(*block->get_by_position(_block_position).column);
+        const auto& data = assert_cast<const ColumnInt32&>(nullable.get_nested_column());
+
+        auto result = ColumnUInt8::create();
+        auto& result_data = result->get_data();
+        result_data.resize(count);
+        for (size_t row = 0; row < count; ++row) {
+            const auto source_row = selector == nullptr ? row : (*selector)[row];
+            result_data[row] =
+                    !nullable.is_null_at(source_row) && data.get_element(source_row) > _value;
+        }
+        result_column = std::move(result);
+        return Status::OK();
+    }
+
+    Status clone_node(VExprSPtr* cloned_expr) const override {
+        DORIS_CHECK(cloned_expr != nullptr);
+        *cloned_expr = std::make_shared<NullableIntGreaterThanExpr>(_block_position, _value);
+        return Status::OK();
+    }
+
+private:
+    size_t _block_position;
+    int32_t _value;
+    const std::string _name = "NullableIntGreaterThanExpr";
+};
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto context = VExprContext::create_shared(expr);
+    auto status = context->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = context->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return context;
+}
+
+} // namespace
+
+TEST(RemoteDorisV2ReaderTest, BuildsSchemaFromSlotsAndProjectsRequestedColumns) {
+    ObjectPool pool;
+    DescriptorTbl* desc_tbl = nullptr;
+    const auto slots = remote_slots(&pool, &desc_tbl);
+    RuntimeState state;
+    RuntimeProfile profile("remote_doris_v2_reader_test");
+    auto close_count = std::make_shared<int>(0);
+    auto reader = create_reader(&profile, remote_doris_range(), slots, {make_batch({"id", "name"})},
+                                close_count);
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 2);
+    EXPECT_EQ(schema[0].name, "id");
+    EXPECT_EQ(schema[0].local_id, 0);
+    EXPECT_EQ(schema[1].name, "name");
+    EXPECT_EQ(schema[1].local_id, 1);
+
+    auto request = std::make_shared<FileScanRequest>();
+    FileScanRequestBuilder builder(request.get());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok());
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_request_block(schema, {1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_FALSE(eof);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice");
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 1), "bob");
+
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    EXPECT_EQ(rows, 0);
+    EXPECT_TRUE(eof);
+    ASSERT_TRUE(reader->close().ok());
+    EXPECT_EQ(*close_count, 1);
+}
+
+TEST(RemoteDorisV2ReaderTest, BuildsComplexSchemaChildrenFromSlots) {
+    ObjectPool pool;
+    DescriptorTbl* desc_tbl = nullptr;
+    const auto slots = remote_complex_slots(&pool, &desc_tbl);
+    RuntimeState state;
+    RuntimeProfile profile("remote_doris_v2_reader_complex_schema_test");
+    auto close_count = std::make_shared<int>(0);
+    auto reader = create_reader(&profile, remote_doris_range(), slots, {}, close_count);
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    ASSERT_EQ(schema.size(), 3);
+
+    ASSERT_EQ(schema[0].name, "c_array_s");
+    ASSERT_EQ(schema[0].children.size(), 1);
+    EXPECT_EQ(schema[0].children[0].name, "element");
+    EXPECT_EQ(schema[0].children[0].local_id, 0);
+    EXPECT_TRUE(schema[0].children[0].children.empty());
+
+    ASSERT_EQ(schema[1].name, "c_map");
+    ASSERT_EQ(schema[1].children.size(), 2);
+    EXPECT_EQ(schema[1].children[0].name, "key");
+    EXPECT_EQ(schema[1].children[0].local_id, 0);
+    EXPECT_EQ(schema[1].children[1].name, "value");
+    EXPECT_EQ(schema[1].children[1].local_id, 1);
+
+    ASSERT_EQ(schema[2].name, "c_struct");
+    ASSERT_EQ(schema[2].children.size(), 3);
+    EXPECT_EQ(schema[2].children[0].name, "f1");
+    EXPECT_EQ(schema[2].children[0].local_id, 0);
+    EXPECT_EQ(schema[2].children[1].name, "f2");
+    EXPECT_EQ(schema[2].children[1].local_id, 1);
+    EXPECT_EQ(schema[2].children[2].name, "f3");
+    EXPECT_EQ(schema[2].children[2].local_id, 2);
+}
+
+TEST(RemoteDorisV2ReaderTest, HandlesDifferentArrowColumnOrder) {
+    ObjectPool pool;
+    DescriptorTbl* desc_tbl = nullptr;
+    const auto slots = remote_slots(&pool, &desc_tbl);
+    RuntimeState state;
+    RuntimeProfile profile("remote_doris_v2_reader_reordered_test");
+    auto close_count = std::make_shared<int>(0);
+    auto reader = create_reader(&profile, remote_doris_range(), slots, {make_batch({"name", "id"})},
+                                close_count);
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<FileScanRequest>();
+    FileScanRequestBuilder builder(request.get());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(0)).ok());
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_request_block(schema, {1, 0});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 2);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(0).column, 0), "alice");
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(1).column, 1), 20);
+}
+
+TEST(RemoteDorisV2ReaderTest, AppliesConjunctsAndTracksPredicateFilteredRows) {
+    ObjectPool pool;
+    DescriptorTbl* desc_tbl = nullptr;
+    const auto slots = remote_slots(&pool, &desc_tbl);
+    RuntimeState state;
+    RuntimeProfile profile("remote_doris_v2_reader_filter_test");
+    auto close_count = std::make_shared<int>(0);
+    auto io_ctx = std::make_shared<io::IOContext>();
+    auto reader = create_reader(&profile, remote_doris_range(), slots, {make_batch({"id", "name"})},
+                                close_count, io_ctx);
+    ASSERT_TRUE(reader->init(&state).ok());
+
+    std::vector<ColumnDefinition> schema;
+    ASSERT_TRUE(reader->get_schema(&schema).ok());
+    auto request = std::make_shared<FileScanRequest>();
+    FileScanRequestBuilder builder(request.get());
+    ASSERT_TRUE(builder.add_predicate_column(LocalColumnId(0)).ok());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok());
+    request->conjuncts = {
+            prepared_conjunct(&state, std::make_shared<NullableIntGreaterThanExpr>(0, 10))};
+    ASSERT_TRUE(reader->open(request).ok());
+
+    auto block = make_request_block(schema, {0, 1});
+    size_t rows = 0;
+    bool eof = false;
+    ASSERT_TRUE(reader->get_block(&block, &rows, &eof).ok());
+    ASSERT_EQ(rows, 1);
+    EXPECT_FALSE(eof);
+    EXPECT_EQ(nullable_int_at(*block.get_by_position(0).column, 0), 20);
+    EXPECT_EQ(nullable_string_at(*block.get_by_position(1).column, 0), "bob");
+    EXPECT_EQ(io_ctx->predicate_filtered_rows, 1);
+}
+
+TEST(RemoteDorisV2ReaderTest, RejectsUnknownReturnedColumnAndMissingRequestedColumn) {
+    ObjectPool pool;
+    DescriptorTbl* desc_tbl = nullptr;
+    const auto slots = remote_slots(&pool, &desc_tbl);
+    RuntimeState state;
+    RuntimeProfile profile("remote_doris_v2_reader_error_test");
+
+    {
+        auto close_count = std::make_shared<int>(0);
+        auto reader = create_reader(&profile, remote_doris_range(), slots,
+                                    {make_batch({"unknown"})}, close_count);
+        ASSERT_TRUE(reader->init(&state).ok());
+        std::vector<ColumnDefinition> schema;
+        ASSERT_TRUE(reader->get_schema(&schema).ok());
+        auto request = std::make_shared<FileScanRequest>();
+        FileScanRequestBuilder builder(request.get());
+        ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(0)).ok());
+        ASSERT_TRUE(reader->open(request).ok());
+        auto block = make_request_block(schema, {0});
+        size_t rows = 0;
+        bool eof = false;
+        EXPECT_FALSE(reader->get_block(&block, &rows, &eof).ok());
+    }
+
+    {
+        auto close_count = std::make_shared<int>(0);
+        auto reader = create_reader(&profile, remote_doris_range(), slots, {make_batch({"id"})},
+                                    close_count);
+        ASSERT_TRUE(reader->init(&state).ok());
+        std::vector<ColumnDefinition> schema;
+        ASSERT_TRUE(reader->get_schema(&schema).ok());
+        auto request = std::make_shared<FileScanRequest>();
+        FileScanRequestBuilder builder(request.get());
+        ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok());
+        ASSERT_TRUE(reader->open(request).ok());
+        auto block = make_request_block(schema, {1});
+        size_t rows = 0;
+        bool eof = false;
+        EXPECT_FALSE(reader->get_block(&block, &rows, &eof).ok());
+    }
+}
+
+TEST(RemoteDorisV2ReaderTest, RejectsInvalidRemoteDorisRange) {
+    ObjectPool pool;
+    DescriptorTbl* desc_tbl = nullptr;
+    const auto slots = remote_slots(&pool, &desc_tbl);
+    RuntimeState state;
+    RuntimeProfile profile("remote_doris_v2_reader_bad_range_test");
+    auto range = remote_doris_range();
+    range.table_format_params.__isset.remote_doris_params = false;
+    auto close_count = std::make_shared<int>(0);
+    auto reader = create_reader(&profile, range, slots, {}, close_count);
+    EXPECT_FALSE(reader->init(&state).ok());
+}
+
+} // namespace doris::format::remote_doris
diff --git a/be/test/format_v2/table_reader_request_test.cpp b/be/test/format_v2/table_reader_request_test.cpp
new file mode 100644
index 00000000000000..3845e086cea1b1
--- /dev/null
+++ b/be/test/format_v2/table_reader_request_test.cpp
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include "format_v2/table_reader.h"
+
+namespace doris::format {
+namespace {
+
+class TableReaderRequestTestHelper final : public TableReader {
+public:
+    using TableReader::_append_file_scan_column;
+};
+
+// Scenario: FileScanRequestBuilder owns request-local block positions and merges repeated nested
+// projections for the same root. ColumnMapper can focus on producing file-local projection trees.
+TEST(FileScanRequestBuilderTest, MergesNestedProjectionAndKeepsStableBlockPosition) {
+    FileScanRequest request;
+    FileScanRequestBuilder builder(&request);
+
+    auto name_projection = LocalColumnIndex::partial_local(5);
+    name_projection.children.push_back(LocalColumnIndex::local(2));
+    ASSERT_TRUE(builder.add_non_predicate_column(std::move(name_projection)).ok());
+
+    auto id_projection = LocalColumnIndex::partial_local(5);
+    id_projection.children.push_back(LocalColumnIndex::local(0));
+    ASSERT_TRUE(builder.add_non_predicate_column(std::move(id_projection)).ok());
+
+    ASSERT_EQ(request.local_positions.size(), 1);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(5)).value(), 0);
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    const auto& projection = request.non_predicate_columns[0];
+    EXPECT_EQ(projection.column_id(), LocalColumnId(5));
+    ASSERT_FALSE(projection.project_all_children);
+    ASSERT_EQ(projection.children.size(), 2);
+    EXPECT_EQ(projection.children[0].local_id(), 0);
+    EXPECT_EQ(projection.children[1].local_id(), 2);
+}
+
+// Scenario: predicate scan columns dominate non-predicate columns because file readers return
+// predicate columns in the same file-local block and TableReader can reuse them for output.
+TEST(FileScanRequestBuilderTest, PredicateColumnRemovesDuplicateNonPredicateColumn) {
+    FileScanRequest request;
+    FileScanRequestBuilder builder(&request);
+
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(1)).ok());
+    ASSERT_TRUE(builder.add_non_predicate_column(LocalColumnId(2)).ok());
+    ASSERT_TRUE(builder.add_predicate_column(LocalColumnId(1)).ok());
+
+    ASSERT_EQ(request.local_positions.size(), 2);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(1)).value(), 0);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(2)).value(), 1);
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(2));
+}
+
+// Scenario: TableReader's format-specific customization path delegates to FileScanRequestBuilder
+// and preserves the same predicate/non-predicate de-duplication rule.
+TEST(TableReaderRequestTest, AppendPredicateColumnKeepsOtherNonPredicateColumns) {
+    TableReaderRequestTestHelper reader;
+    FileScanRequest request;
+
+    reader._append_file_scan_column(&request, LocalColumnId(1), &request.non_predicate_columns);
+    reader._append_file_scan_column(&request, LocalColumnId(2), &request.non_predicate_columns);
+    reader._append_file_scan_column(&request, LocalColumnId(1), &request.predicate_columns);
+
+    ASSERT_EQ(request.local_positions.size(), 2);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(1)).value(), 0);
+    EXPECT_EQ(request.local_positions.at(LocalColumnId(2)).value(), 1);
+
+    ASSERT_EQ(request.predicate_columns.size(), 1);
+    EXPECT_EQ(request.predicate_columns[0].column_id(), LocalColumnId(1));
+
+    ASSERT_EQ(request.non_predicate_columns.size(), 1);
+    EXPECT_EQ(request.non_predicate_columns[0].column_id(), LocalColumnId(2));
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/be/test/format_v2/table_reader_test.cpp b/be/test/format_v2/table_reader_test.cpp
new file mode 100644
index 00000000000000..a8659667688d4c
--- /dev/null
+++ b/be/test/format_v2/table_reader_test.cpp
@@ -0,0 +1,3826 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "format_v2/table_reader.h"
+
+#include <arrow/api.h>
+#include <arrow/io/api.h>
+#include <gtest/gtest.h>
+#include <parquet/api/reader.h>
+#include <parquet/arrow/writer.h>
+
+#include <algorithm>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <optional>
+#include <string>
+#include <typeinfo>
+#include <vector>
+
+#include "common/consts.h"
+#include "core/assert_cast.h"
+#include "core/block/block.h"
+#include "core/column/column_array.h"
+#include "core/column/column_const.h"
+#include "core/column/column_map.h"
+#include "core/column/column_nullable.h"
+#include "core/column/column_string.h"
+#include "core/column/column_struct.h"
+#include "core/column/column_vector.h"
+#include "core/data_type/data_type_array.h"
+#include "core/data_type/data_type_map.h"
+#include "core/data_type/data_type_nullable.h"
+#include "core/data_type/data_type_number.h"
+#include "core/data_type/data_type_string.h"
+#include "core/data_type/data_type_struct.h"
+#include "exprs/runtime_filter_expr.h"
+#include "exprs/vectorized_fn_call.h"
+#include "exprs/vexpr.h"
+#include "exprs/vliteral.h"
+#include "exprs/vslot_ref.h"
+#include "gen_cpp/Exprs_types.h"
+#include "gen_cpp/ExternalTableSchema_types.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "io/io_common.h"
+#include "runtime/runtime_profile.h"
+#include "runtime/runtime_state.h"
+#include "storage/predicate/predicate_creator.h"
+#include "storage/segment/condition_cache.h"
+
+namespace doris::format {
+namespace {
+
+std::vector<int32_t> projection_ids(const std::vector<LocalColumnIndex>& projections) {
+    std::vector<int32_t> ids;
+    ids.reserve(projections.size());
+    for (const auto& projection : projections) {
+        ids.push_back(projection.index);
+    }
+    return ids;
+}
+
+TEST(LocalColumnIndexTest, MergeUnionsPartialChildrenAndFullProjectionDominates) {
+    LocalColumnIndex target {.index = 10, .project_all_children = false};
+    target.children.push_back({.index = 1});
+    target.children.push_back({.index = 2, .project_all_children = false});
+    target.children.back().children.push_back({.index = 20});
+
+    LocalColumnIndex source {.index = 10, .project_all_children = false};
+    source.children.push_back({.index = 2, .project_all_children = false});
+    source.children.back().children.push_back({.index = 21});
+    source.children.push_back({.index = 3});
+
+    ASSERT_TRUE(merge_local_column_index(&target, source).ok());
+    ASSERT_FALSE(target.project_all_children);
+    ASSERT_EQ(std::vector<int32_t>({1, 2, 3}), projection_ids(target.children));
+    ASSERT_FALSE(target.children[1].project_all_children);
+    ASSERT_EQ(std::vector<int32_t>({20, 21}), projection_ids(target.children[1].children));
+    ASSERT_TRUE(target.children[2].project_all_children);
+
+    LocalColumnIndex full_source {.index = 10};
+    ASSERT_TRUE(merge_local_column_index(&target, full_source).ok());
+    ASSERT_TRUE(target.project_all_children);
+    ASSERT_TRUE(target.children.empty());
+}
+
+TEST(LocalColumnIndexTest, FindsProjectedChildren) {
+    LocalColumnIndex projection {.index = 10, .project_all_children = false};
+    projection.children.push_back({.index = 1});
+    projection.children.push_back({.index = 2});
+
+    EXPECT_TRUE(is_full_projection(nullptr));
+    EXPECT_FALSE(is_full_projection(&projection));
+    EXPECT_TRUE(is_partial_projection(&projection));
+    ASSERT_NE(find_child_projection(&projection, 2), nullptr);
+    EXPECT_EQ(find_child_projection(&projection, 2)->local_id(), 2);
+    EXPECT_EQ(find_child_projection(&projection, 3), nullptr);
+    EXPECT_TRUE(is_child_projected(nullptr, 3));
+    EXPECT_TRUE(is_child_projected(&projection, 1));
+    EXPECT_FALSE(is_child_projected(&projection, 3));
+}
+
+TEST(LocalColumnIndexTest, ProjectColumnDefinitionMatchesChildrenByLocalId) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(5);
+    field.name = "root";
+    field.type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    ColumnDefinition a_child;
+    a_child.identifier = Field::create_field<TYPE_INT>(10);
+    a_child.local_id = 0;
+    a_child.name = "a";
+    a_child.type = int_type;
+    ColumnDefinition b_child;
+    b_child.identifier = Field::create_field<TYPE_INT>(20);
+    b_child.local_id = 1;
+    b_child.name = "b";
+    b_child.type = string_type;
+    field.children = {
+            a_child,
+            b_child,
+    };
+    LocalColumnIndex projection {.index = 5, .project_all_children = false};
+    projection.children.push_back({.index = 1});
+
+    ColumnDefinition projected_field;
+    ASSERT_TRUE(project_column_definition(field, projection, &projected_field).ok());
+    ASSERT_EQ(projected_field.children.size(), 1);
+    EXPECT_EQ(projected_field.children[0].get_identifier_field_id(), 20);
+    EXPECT_EQ(projected_field.children[0].name, "b");
+
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(projected_field.type).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 1);
+    EXPECT_EQ(projected_type->get_element_name(0), "b");
+    EXPECT_TRUE(projected_type->get_element(0)->equals(*string_type));
+}
+
+TEST(LocalColumnIndexTest, ProjectColumnDefinitionKeepsFileChildOrder) {
+    auto int_type = std::make_shared<DataTypeInt32>();
+    auto string_type = std::make_shared<DataTypeString>();
+    ColumnDefinition a_child;
+    a_child.identifier = Field::create_field<TYPE_INT>(10);
+    a_child.local_id = 0;
+    a_child.name = "a";
+    a_child.type = int_type;
+    ColumnDefinition b_child;
+    b_child.identifier = Field::create_field<TYPE_INT>(20);
+    b_child.local_id = 1;
+    b_child.name = "b";
+    b_child.type = string_type;
+
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(5);
+    field.name = "root";
+    field.type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    field.children = {a_child, b_child};
+
+    LocalColumnIndex projection {.index = 5, .project_all_children = false};
+    projection.children.push_back({.index = 1});
+    projection.children.push_back({.index = 0});
+
+    ColumnDefinition projected_field;
+    ASSERT_TRUE(project_column_definition(field, projection, &projected_field).ok());
+    ASSERT_EQ(projected_field.children.size(), 2);
+    EXPECT_EQ(projected_field.children[0].name, "a");
+    EXPECT_EQ(projected_field.children[1].name, "b");
+
+    const auto* projected_type =
+            assert_cast<const DataTypeStruct*>(remove_nullable(projected_field.type).get());
+    ASSERT_EQ(projected_type->get_elements().size(), 2);
+    EXPECT_EQ(projected_type->get_element_name(0), "a");
+    EXPECT_EQ(projected_type->get_element_name(1), "b");
+}
+
+VExprSPtr table_int32_slot_ref(int slot_id, int column_id, const std::string& column_name) {
+    const auto nullable_int_type = make_nullable(std::make_shared<DataTypeInt32>());
+    return VSlotRef::create_shared(slot_id, column_id, slot_id, nullable_int_type, column_name);
+}
+
+VExprSPtr table_int32_literal(int32_t value) {
+    return VLiteral::create_shared(std::make_shared<DataTypeInt32>(),
+                                   Field::create_field<TYPE_INT>(value));
+}
+
+TExprNode table_function_node(const std::string& function_name, const DataTypePtr& return_type,
+                              const std::vector<DataTypePtr>& arg_types,
+                              TExprNodeType::type node_type,
+                              TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE,
+                              bool short_circuit_evaluation = false) {
+    TFunctionName fn_name;
+    fn_name.__set_function_name(function_name);
+    TFunction fn;
+    fn.__set_name(fn_name);
+    fn.__set_binary_type(TFunctionBinaryType::BUILTIN);
+    std::vector<TTypeDesc> thrift_arg_types;
+    thrift_arg_types.reserve(arg_types.size());
+    for (const auto& arg_type : arg_types) {
+        thrift_arg_types.push_back(arg_type->to_thrift());
+    }
+    fn.__set_arg_types(thrift_arg_types);
+    fn.__set_ret_type(return_type->to_thrift());
+    fn.__set_has_var_args(false);
+
+    TExprNode node;
+    node.__set_node_type(node_type);
+    node.__set_opcode(opcode);
+    node.__set_type(return_type->to_thrift());
+    node.__set_fn(fn);
+    node.__set_num_children(static_cast<int16_t>(arg_types.size()));
+    node.__set_is_nullable(return_type->is_nullable());
+    if (short_circuit_evaluation) {
+        node.__set_short_circuit_evaluation(true);
+    }
+    return node;
+}
+
+VExprSPtr create_expr_from_node(const TExprNode& node) {
+    VExprSPtr expr;
+    auto status = VExpr::create_expr(node, expr);
+    DORIS_CHECK(status.ok()) << status.to_string();
+    return expr;
+}
+
+VExprSPtr table_function_expr(const std::string& function_name, const DataTypePtr& return_type,
+                              const std::vector<DataTypePtr>& arg_types,
+                              TExprNodeType::type node_type = TExprNodeType::FUNCTION_CALL,
+                              TExprOpcode::type opcode = TExprOpcode::INVALID_OPCODE) {
+    const auto node = table_function_node(function_name, return_type, arg_types, node_type, opcode);
+    return VectorizedFnCall::create_shared(node);
+}
+
+VExprSPtr table_int32_greater_than_expr(int slot_id, int column_id, int32_t value) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto expr = table_function_expr("gt", make_nullable(std::make_shared<DataTypeUInt8>()),
+                                    {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED,
+                                    TExprOpcode::GT);
+    expr->add_child(table_int32_slot_ref(slot_id, column_id, "id"));
+    expr->add_child(table_int32_literal(value));
+    return expr;
+}
+
+VExprSPtr runtime_filter_wrapper_expr(VExprSPtr impl) {
+    TExprNode node;
+    node.__set_node_type(TExprNodeType::SLOT_REF);
+    node.__set_type(std::make_shared<DataTypeUInt8>()->to_thrift());
+    node.__set_num_children(1);
+    return RuntimeFilterExpr::create_shared(node, std::move(impl), 0, false, /*filter_id=*/1);
+}
+
+class NullableArrayBigintDefaultExpr final : public VExpr {
+public:
+    explicit NullableArrayBigintDefaultExpr(DataTypePtr data_type)
+            : _name("single_element_groups") {
+        _data_type = std::move(data_type);
+    }
+
+    const std::string& expr_name() const override { return _name; }
+
+    bool is_constant() const override { return false; }
+
+    Status execute_column_impl(VExprContext*, const Block*, const Selector* selector, size_t count,
+                               ColumnPtr& result_column) const override {
+        DCHECK(selector == nullptr || selector->size() == count);
+        auto values = ColumnInt64::create();
+        auto offsets = ColumnArray::ColumnOffsets::create();
+        auto null_map = ColumnUInt8::create();
+        for (size_t i = 0; i < count; ++i) {
+            values->insert_value(7);
+            offsets->insert_value(static_cast<Int64>(i + 1));
+            null_map->insert_value(0);
+        }
+        auto array_column = ColumnArray::create(std::move(values), std::move(offsets));
+        result_column = ColumnNullable::create(std::move(array_column), std::move(null_map));
+        return Status::OK();
+    }
+
+private:
+    std::string _name;
+};
+
+class TableReaderMaterializeTestHelper final : public TableReader {
+public:
+    using TableReader::_materialize_map_mapping_column;
+};
+
+VExprSPtr table_int32_sum_expr(int left_slot_id, int left_column_id, int right_slot_id,
+                               int right_column_id) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto expr =
+            table_function_expr("add", nullable_int_type, {nullable_int_type, nullable_int_type});
+    expr->add_child(table_int32_slot_ref(left_slot_id, left_column_id, "id"));
+    expr->add_child(table_int32_slot_ref(right_slot_id, right_column_id, "score"));
+    return expr;
+}
+
+VExprSPtr table_int32_sum_greater_than_expr(int left_slot_id, int left_column_id, int right_slot_id,
+                                            int right_column_id, int32_t value) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto expr = table_function_expr("gt", make_nullable(std::make_shared<DataTypeUInt8>()),
+                                    {nullable_int_type, int_type}, TExprNodeType::BINARY_PRED,
+                                    TExprOpcode::GT);
+    expr->add_child(
+            table_int32_sum_expr(left_slot_id, left_column_id, right_slot_id, right_column_id));
+    expr->add_child(table_int32_literal(value));
+    return expr;
+}
+
+VExprSPtr table_condition_function_expr(const std::string& function_name, bool short_circuit) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    std::vector<DataTypePtr> arg_types;
+    if (function_name == "if") {
+        arg_types = {std::make_shared<DataTypeUInt8>(), int_type, int_type};
+    } else {
+        arg_types = {int_type, int_type};
+    }
+    auto expr = create_expr_from_node(
+            table_function_node(function_name, int_type, arg_types, TExprNodeType::FUNCTION_CALL,
+                                TExprOpcode::INVALID_OPCODE, short_circuit));
+    if (function_name == "if") {
+        expr->add_child(table_int32_greater_than_expr(0, 0, 0));
+        expr->add_child(table_int32_literal(1));
+        expr->add_child(table_int32_literal(0));
+    } else {
+        expr->add_child(table_int32_slot_ref(0, 0, "id"));
+        expr->add_child(table_int32_literal(0));
+    }
+    return expr;
+}
+
+VExprSPtr table_case_expr(bool short_circuit) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    TCaseExpr case_node;
+    case_node.__set_has_case_expr(false);
+    case_node.__set_has_else_expr(true);
+
+    TExprNode node;
+    node.__set_node_type(TExprNodeType::CASE_EXPR);
+    node.__set_type(int_type->to_thrift());
+    node.__set_is_nullable(false);
+    node.__set_num_children(3);
+    node.__set_case_expr(case_node);
+    if (short_circuit) {
+        node.__set_short_circuit_evaluation(true);
+    }
+
+    auto expr = create_expr_from_node(node);
+    expr->add_child(table_int32_greater_than_expr(0, 0, 0));
+    expr->add_child(table_int32_literal(1));
+    expr->add_child(table_int32_literal(0));
+    return expr;
+}
+
+TEST(CloneTableExprTreeTest, ClonesConditionalExpressions) {
+    const std::vector<VExprSPtr> expressions {
+            table_condition_function_expr("if", false),
+            table_condition_function_expr("if", true),
+            table_condition_function_expr("ifnull", false),
+            table_condition_function_expr("ifnull", true),
+            table_condition_function_expr("coalesce", false),
+            table_condition_function_expr("coalesce", true),
+            table_case_expr(false),
+            table_case_expr(true),
+    };
+
+    for (const auto& expr : expressions) {
+        VExprSPtr cloned;
+        const auto status = clone_table_expr_tree(expr, &cloned);
+        ASSERT_TRUE(status.ok()) << expr->debug_string() << ": " << status.to_string();
+        ASSERT_NE(cloned, nullptr);
+        const auto* original_expr = expr.get();
+        const auto* cloned_expr = cloned.get();
+        EXPECT_TRUE(typeid(*original_expr) == typeid(*cloned_expr))
+                << expr->expr_name() << " cloned as " << typeid(*cloned_expr).name();
+        EXPECT_EQ(expr->expr_name(), cloned->expr_name());
+        EXPECT_EQ(expr->get_num_children(), cloned->get_num_children());
+        EXPECT_NE(original_expr, cloned_expr);
+    }
+}
+
+// Scenario: cloning a VectorizedFnCall whose return type is complex must not reconstruct the expr
+// from TExprNode, because DataTypeFactory rejects nested types through the primitive-type path.
+TEST(CloneTableExprTreeTest, ClonesVectorizedFnCallWithComplexReturnType) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto struct_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    const auto array_type = std::make_shared<DataTypeArray>(struct_type);
+
+    auto expr = table_function_expr("element_at", struct_type, {array_type, int_type});
+    expr->add_child(VSlotRef::create_shared(0, 0, -1, array_type, "array_of_struct"));
+    expr->add_child(table_int32_literal(1));
+
+    VExprSPtr cloned;
+    const auto status = clone_table_expr_tree(expr, &cloned);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_NE(cloned, nullptr);
+    EXPECT_EQ(cloned->expr_name(), expr->expr_name());
+    EXPECT_TRUE(cloned->data_type()->equals(*struct_type));
+    EXPECT_EQ(cloned->get_num_children(), 2);
+    EXPECT_NE(cloned.get(), expr.get());
+}
+
+std::shared_ptr<arrow::Array> finish_array(arrow::ArrayBuilder* builder) {
+    std::shared_ptr<arrow::Array> array;
+    EXPECT_TRUE(builder->Finish(&array).ok());
+    return array;
+}
+
+std::shared_ptr<arrow::Array> build_int32_array(const std::vector<int32_t>& values) {
+    arrow::Int32Builder builder;
+    for (const auto value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+std::shared_ptr<arrow::Array> build_string_array(const std::vector<std::string>& values) {
+    arrow::StringBuilder builder;
+    for (const auto& value : values) {
+        EXPECT_TRUE(builder.Append(value).ok());
+    }
+    return finish_array(&builder);
+}
+
+void write_parquet_file(const std::string& file_path, int32_t id, const std::string& value) {
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("value", arrow::utf8(), false),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array({id}), build_string_array({value})});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      builder.build()));
+}
+
+void write_struct_parquet_file(const std::string& file_path, int32_t id) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false)});
+    arrow::StructBuilder builder(
+            struct_type, arrow::default_memory_pool(),
+            {std::make_shared<arrow::Int32Builder>(arrow::default_memory_pool())});
+    auto* id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(id_builder->Append(id).ok());
+
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 1,
+                                                      writer_builder.build()));
+}
+
+void write_struct_parquet_file(const std::string& file_path, const std::vector<int32_t>& ids,
+                               int64_t row_group_size = -1) {
+    auto struct_type = arrow::struct_({arrow::field("id", arrow::int32(), false)});
+    arrow::StructBuilder builder(
+            struct_type, arrow::default_memory_pool(),
+            {std::make_shared<arrow::Int32Builder>(arrow::default_memory_pool())});
+    auto* id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    for (const auto id : ids) {
+        EXPECT_TRUE(builder.Append().ok());
+        EXPECT_TRUE(id_builder->Append(id).ok());
+    }
+
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    const auto write_row_group_size =
+            row_group_size > 0 ? row_group_size : static_cast<int64_t>(ids.size());
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      write_row_group_size,
+                                                      writer_builder.build()));
+}
+
+void write_struct_with_nullable_child_parquet_file(const std::string& file_path) {
+    auto struct_type = arrow::struct_({
+            arrow::field("id", arrow::int32(), false),
+            arrow::field("note", arrow::utf8(), true),
+    });
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto id_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(id_builder)));
+    auto note_builder = std::make_unique<arrow::StringBuilder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(note_builder)));
+    arrow::StructBuilder builder(struct_type, arrow::default_memory_pool(),
+                                 std::move(field_builders));
+    auto* struct_id_builder = assert_cast<arrow::Int32Builder*>(builder.field_builder(0));
+    auto* struct_note_builder = assert_cast<arrow::StringBuilder*>(builder.field_builder(1));
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_id_builder->Append(7).ok());
+    EXPECT_TRUE(struct_note_builder->Append("seven").ok());
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_id_builder->Append(8).ok());
+    EXPECT_TRUE(struct_note_builder->AppendNull().ok());
+
+    auto schema = arrow::schema({
+            arrow::field("s", struct_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 2,
+                                                      writer_builder.build()));
+}
+
+void write_list_struct_parquet_file(const std::string& file_path) {
+    auto struct_type = arrow::struct_(
+            {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::int32(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+    auto b_array_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+    auto struct_builder = std::make_shared<arrow::StructBuilder>(
+            struct_type, arrow::default_memory_pool(), std::move(field_builders));
+    auto list_type = arrow::list(arrow::field("element", struct_type, true));
+    arrow::ListBuilder builder(arrow::default_memory_pool(), struct_builder, list_type);
+    auto* a_builder = assert_cast<arrow::Int32Builder*>(struct_builder->field_builder(0));
+    auto* b_builder = assert_cast<arrow::Int32Builder*>(struct_builder->field_builder(1));
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(10).ok());
+    EXPECT_TRUE(b_builder->Append(11).ok());
+    EXPECT_TRUE(struct_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(20).ok());
+    EXPECT_TRUE(b_builder->Append(21).ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(30).ok());
+    EXPECT_TRUE(b_builder->Append(31).ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(struct_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(40).ok());
+    EXPECT_TRUE(b_builder->Append(41).ok());
+
+    auto schema = arrow::schema({
+            arrow::field("xs", list_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 3,
+                                                      writer_builder.build()));
+}
+
+void write_map_struct_parquet_file(const std::string& file_path) {
+    auto key_builder = std::make_shared<arrow::Int32Builder>();
+    auto struct_type = arrow::struct_(
+            {arrow::field("a", arrow::int32(), false), arrow::field("b", arrow::utf8(), false)});
+    std::vector<std::shared_ptr<arrow::ArrayBuilder>> field_builders;
+    auto a_array_builder = std::make_unique<arrow::Int32Builder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(a_array_builder)));
+    auto b_array_builder = std::make_unique<arrow::StringBuilder>();
+    field_builders.push_back(std::shared_ptr<arrow::ArrayBuilder>(std::move(b_array_builder)));
+    auto value_builder = std::make_shared<arrow::StructBuilder>(
+            struct_type, arrow::default_memory_pool(), std::move(field_builders));
+    auto map_type = arrow::map(arrow::int32(), arrow::field("value", struct_type, false));
+    arrow::MapBuilder builder(arrow::default_memory_pool(), key_builder, value_builder, map_type);
+    auto* a_builder = assert_cast<arrow::Int32Builder*>(value_builder->field_builder(0));
+    auto* b_builder = assert_cast<arrow::StringBuilder*>(value_builder->field_builder(1));
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(key_builder->Append(1).ok());
+    EXPECT_TRUE(value_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(10).ok());
+    EXPECT_TRUE(b_builder->Append("ma").ok());
+    EXPECT_TRUE(key_builder->Append(2).ok());
+    EXPECT_TRUE(value_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(20).ok());
+    EXPECT_TRUE(b_builder->Append("mb").ok());
+
+    EXPECT_TRUE(builder.Append().ok());
+    EXPECT_TRUE(key_builder->Append(3).ok());
+    EXPECT_TRUE(value_builder->Append().ok());
+    EXPECT_TRUE(a_builder->Append(30).ok());
+    EXPECT_TRUE(b_builder->Append("mc").ok());
+
+    EXPECT_TRUE(builder.AppendEmptyValue().ok());
+
+    auto schema = arrow::schema({
+            arrow::field("kv", map_type, false),
+    });
+    auto table = arrow::Table::Make(schema, {finish_array(&builder)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder writer_builder;
+    writer_builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    writer_builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    writer_builder.compression(::parquet::Compression::UNCOMPRESSED);
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out, 3,
+                                                      writer_builder.build()));
+}
+
+void write_int_pair_parquet_file(const std::string& file_path, const std::vector<int32_t>& ids,
+                                 const std::vector<int32_t>& scores,
+                                 const std::vector<std::string>& values,
+                                 int64_t row_group_size = -1) {
+    const auto id_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"0"});
+    const auto score_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"1"});
+    const auto value_metadata = arrow::key_value_metadata({"PARQUET:field_id"}, {"2"});
+    auto schema = arrow::schema({
+            arrow::field("id", arrow::int32(), false)->WithMetadata(id_metadata),
+            arrow::field("score", arrow::int32(), false)->WithMetadata(score_metadata),
+            arrow::field("value", arrow::utf8(), false)->WithMetadata(value_metadata),
+    });
+    auto table = arrow::Table::Make(schema, {build_int32_array(ids), build_int32_array(scores),
+                                             build_string_array(values)});
+
+    auto file_result = arrow::io::FileOutputStream::Open(file_path);
+    ASSERT_TRUE(file_result.ok()) << file_result.status();
+    std::shared_ptr<arrow::io::FileOutputStream> out = *file_result;
+
+    ::parquet::WriterProperties::Builder builder;
+    builder.version(::parquet::ParquetVersion::PARQUET_2_6);
+    builder.data_page_version(::parquet::ParquetDataPageVersion::V2);
+    builder.compression(::parquet::Compression::UNCOMPRESSED);
+    const auto write_row_group_size =
+            row_group_size > 0 ? row_group_size : static_cast<int64_t>(ids.size());
+    PARQUET_THROW_NOT_OK(::parquet::arrow::WriteTable(*table, arrow::default_memory_pool(), out,
+                                                      write_row_group_size, builder.build()));
+}
+
+Block build_table_block(const std::vector<ColumnDefinition>& columns) {
+    Block block;
+    for (const auto& column : columns) {
+        block.insert({column.type->create_column(), column.type, column.name});
+    }
+    return block;
+}
+
+const IColumn& expect_not_null_nullable_nested_column(const IColumn& column) {
+    if (!column.is_nullable()) {
+        return column;
+    }
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(column);
+    for (const auto is_null : nullable_column.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    return nullable_column.get_nested_column();
+}
+
+void expect_nullable_column_all_null(const IColumn& column) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*full_column);
+    for (const auto is_null : nullable_column.get_null_map_data()) {
+        EXPECT_EQ(is_null, 1);
+    }
+}
+
+const IColumn& expect_not_null_table_column(const Block& block, size_t position) {
+    return expect_not_null_nullable_nested_column(*block.get_by_position(position).column);
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type);
+
+void expect_int32_column_values(const IColumn& column,
+                                const std::vector<int32_t>& expected_values) {
+    const auto full_column = column.convert_to_full_column_if_const();
+    const auto& nested_column = expect_not_null_nullable_nested_column(*full_column);
+    const auto& values = assert_cast<const ColumnInt32&>(nested_column).get_data();
+    ASSERT_EQ(values.size(), expected_values.size());
+    for (size_t row = 0; row < expected_values.size(); ++row) {
+        EXPECT_EQ(values[row], expected_values[row]);
+    }
+}
+
+SplitReadOptions build_split_options(const std::string& file_path) {
+    SplitReadOptions options;
+    options.current_range.__set_path(file_path);
+    options.current_range.__set_file_size(
+            static_cast<int64_t>(std::filesystem::file_size(file_path)));
+    return options;
+}
+
+void set_table_level_row_count(SplitReadOptions* split_options, int64_t row_count) {
+    split_options->current_range.__isset.table_format_params = true;
+    split_options->current_range.table_format_params.__isset.table_level_row_count = true;
+    split_options->current_range.table_format_params.table_level_row_count = row_count;
+}
+
+int64_t parquet_column_start_offset(const ::parquet::ColumnChunkMetaData& column_metadata) {
+    return column_metadata.has_dictionary_page()
+                   ? static_cast<int64_t>(column_metadata.dictionary_page_offset())
+                   : static_cast<int64_t>(column_metadata.data_page_offset());
+}
+
+SplitReadOptions build_split_options_for_row_group_mid(const std::string& file_path,
+                                                       int row_group_idx) {
+    auto options = build_split_options(file_path);
+    auto reader = ::parquet::ParquetFileReader::OpenFile(file_path, false);
+    auto metadata = reader->metadata();
+    auto row_group_metadata = metadata->RowGroup(row_group_idx);
+    auto first_column = row_group_metadata->ColumnChunk(0);
+    auto last_column = row_group_metadata->ColumnChunk(row_group_metadata->num_columns() - 1);
+    const int64_t row_group_start_offset = parquet_column_start_offset(*first_column);
+    const int64_t row_group_end_offset =
+            parquet_column_start_offset(*last_column) + last_column->total_compressed_size();
+    const int64_t row_group_mid_offset =
+            row_group_start_offset + (row_group_end_offset - row_group_start_offset) / 2;
+    options.current_range.__set_start_offset(row_group_mid_offset);
+    options.current_range.__set_size(1);
+    return options;
+}
+
+DataTypePtr make_table_test_type(const DataTypePtr& type, bool nullable_root = true) {
+    DORIS_CHECK(type != nullptr);
+    const auto nested_type = remove_nullable(type);
+    DataTypePtr result;
+    if (const auto* struct_type = typeid_cast<const DataTypeStruct*>(nested_type.get())) {
+        DataTypes child_types;
+        child_types.reserve(struct_type->get_elements().size());
+        for (const auto& child_type : struct_type->get_elements()) {
+            child_types.push_back(make_table_test_type(child_type));
+        }
+        result = std::make_shared<DataTypeStruct>(child_types, struct_type->get_element_names());
+    } else if (const auto* array_type = typeid_cast<const DataTypeArray*>(nested_type.get())) {
+        result = std::make_shared<DataTypeArray>(
+                make_table_test_type(array_type->get_nested_type()));
+    } else if (const auto* map_type = typeid_cast<const DataTypeMap*>(nested_type.get())) {
+        result = std::make_shared<DataTypeMap>(make_table_test_type(map_type->get_key_type()),
+                                               make_table_test_type(map_type->get_value_type()));
+    } else {
+        result = nested_type;
+    }
+    return nullable_root ? make_nullable(result) : result;
+}
+
+ColumnDefinition make_table_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition column;
+    if (id >= 0) {
+        column.identifier = Field::create_field<TYPE_INT>(id);
+    }
+    column.name = name;
+    // TableReader tests model external table scan descriptors. Those table columns are nullable
+    // even when the Parquet file field itself is required, so keep the test schema aligned with
+    // the real scan contract at the construction boundary.
+    column.type = make_table_test_type(type);
+    return column;
+}
+
+ColumnDefinition make_file_column(int32_t id, const std::string& name, const DataTypePtr& type) {
+    ColumnDefinition field;
+    field.identifier = Field::create_field<TYPE_INT>(id);
+    field.local_id = id;
+    field.name = name;
+    field.type = make_table_test_type(type);
+    return field;
+}
+
+schema::external::TFieldPtr external_schema_field(std::string name, int32_t id,
+                                                  std::vector<std::string> aliases = {}) {
+    auto field = std::make_shared<schema::external::TField>();
+    field->__set_name(std::move(name));
+    field->__set_id(id);
+    if (!aliases.empty()) {
+        field->__set_name_mapping(std::move(aliases));
+    }
+    schema::external::TFieldPtr field_ptr;
+    field_ptr.field_ptr = std::move(field);
+    field_ptr.__isset.field_ptr = true;
+    return field_ptr;
+}
+
+schema::external::TFieldPtr external_array_field(std::string name, int32_t id,
+                                                 schema::external::TFieldPtr item_field,
+                                                 std::vector<std::string> aliases = {}) {
+    auto field = external_schema_field(std::move(name), id, std::move(aliases));
+    schema::external::TArrayField array_field;
+    array_field.__set_item_field(std::move(item_field));
+    field.field_ptr->nestedField.__set_array_field(std::move(array_field));
+    field.field_ptr->__isset.nestedField = true;
+    return field;
+}
+
+schema::external::TFieldPtr external_map_field(std::string name, int32_t id,
+                                               schema::external::TFieldPtr key_field,
+                                               schema::external::TFieldPtr value_field,
+                                               std::vector<std::string> aliases = {}) {
+    auto field = external_schema_field(std::move(name), id, std::move(aliases));
+    schema::external::TMapField map_field;
+    map_field.__set_key_field(std::move(key_field));
+    map_field.__set_value_field(std::move(value_field));
+    field.field_ptr->nestedField.__set_map_field(std::move(map_field));
+    field.field_ptr->__isset.nestedField = true;
+    return field;
+}
+
+schema::external::TFieldPtr external_struct_field(std::string name, int32_t id,
+                                                  std::vector<schema::external::TFieldPtr> fields,
+                                                  std::vector<std::string> aliases = {}) {
+    auto field = external_schema_field(std::move(name), id, std::move(aliases));
+    schema::external::TStructField struct_field;
+    struct_field.__set_fields(std::move(fields));
+    field.field_ptr->nestedField.__set_struct_field(std::move(struct_field));
+    field.field_ptr->__isset.nestedField = true;
+    return field;
+}
+
+schema::external::TSchema external_schema(int64_t schema_id,
+                                          std::vector<schema::external::TFieldPtr> fields) {
+    schema::external::TStructField root_field;
+    root_field.__set_fields(std::move(fields));
+    schema::external::TSchema schema;
+    schema.__set_schema_id(schema_id);
+    schema.__set_root_field(std::move(root_field));
+    return schema;
+}
+
+ColumnDefinition make_nullable_column_definition(ColumnDefinition column) {
+    column.type = make_table_test_type(column.type);
+    for (auto& child : column.children) {
+        child = make_nullable_column_definition(std::move(child));
+    }
+    return column;
+}
+
+MutableColumnPtr make_not_null_nullable_column(MutableColumnPtr nested_column) {
+    auto null_map = ColumnUInt8::create();
+    for (size_t i = 0; i < nested_column->size(); ++i) {
+        null_map->insert_value(0);
+    }
+    return ColumnNullable::create(std::move(nested_column), std::move(null_map));
+}
+
+class TableReaderCharVarcharTestHelper final : public TableReader {
+public:
+    using TableReader::_should_truncate_char_or_varchar_column;
+    using TableReader::_truncate_char_or_varchar_column;
+};
+
+TEST(TableReaderTest, TruncateCharOrVarcharPredicateOnlyAppliesToParquetStringWidthMismatch) {
+    ColumnMapping mapping;
+    mapping.table_type = std::make_shared<DataTypeString>(3, TYPE_VARCHAR);
+    mapping.file_type = std::make_shared<DataTypeString>(10, TYPE_VARCHAR);
+    EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+
+    mapping.file_type = std::make_shared<DataTypeString>(2, TYPE_VARCHAR);
+    EXPECT_FALSE(
+            TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+
+    mapping.file_type = std::make_shared<DataTypeString>();
+    EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+
+    mapping.file_type = std::make_shared<DataTypeInt32>();
+    EXPECT_TRUE(TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+
+    mapping.table_type = std::make_shared<DataTypeString>();
+    EXPECT_FALSE(
+            TableReaderCharVarcharTestHelper::_should_truncate_char_or_varchar_column(mapping));
+}
+
+TEST(TableReaderTest, TruncateCharOrVarcharColumnKeepsNullMap) {
+    auto nested = ColumnString::create();
+    nested->insert_data("abcdef", 6);
+    nested->insert_data("xyz", 3);
+    auto null_map = ColumnUInt8::create();
+    null_map->insert_value(0);
+    null_map->insert_value(1);
+
+    auto type = make_nullable(std::make_shared<DataTypeString>(3, TYPE_VARCHAR));
+    Block block;
+    block.insert({ColumnNullable::create(std::move(nested), std::move(null_map)), type, "v"});
+
+    TableReaderCharVarcharTestHelper::_truncate_char_or_varchar_column(&block, 0, 3);
+
+    ASSERT_EQ(block.columns(), 1);
+    ASSERT_EQ(block.rows(), 2);
+    const auto* nullable_column =
+            assert_cast<const ColumnNullable*>(block.get_by_position(0).column.get());
+    EXPECT_EQ(nullable_column->get_nested_column().get_data_at(0).to_string(), "abc");
+    EXPECT_FALSE(nullable_column->is_null_at(0));
+    EXPECT_TRUE(nullable_column->is_null_at(1));
+}
+
+void set_name_identifiers(std::vector<ColumnDefinition>* columns);
+
+void set_name_identifier(ColumnDefinition* column) {
+    DORIS_CHECK(column != nullptr);
+    column->identifier = Field::create_field<TYPE_STRING>(column->name);
+    set_name_identifiers(&column->children);
+}
+
+void set_name_identifiers(std::vector<ColumnDefinition>* columns) {
+    DORIS_CHECK(columns != nullptr);
+    for (auto& column : *columns) {
+        set_name_identifier(&column);
+    }
+}
+
+void add_column_predicate(TableColumnPredicates* column_predicates, GlobalIndex global_index,
+                          std::shared_ptr<ColumnPredicate> predicate) {
+    auto& entry = (*column_predicates)[global_index];
+    entry.push_back(std::move(predicate));
+}
+
+VExprContextSPtr prepared_conjunct(RuntimeState* state, const VExprSPtr& expr) {
+    auto ctx = VExprContext::create_shared(expr);
+    auto status = ctx->prepare(state, RowDescriptor());
+    EXPECT_TRUE(status.ok()) << status;
+    status = ctx->open(state);
+    EXPECT_TRUE(status.ok()) << status;
+    return ctx;
+}
+
+struct FakeFileReaderState {
+    int init_count = 0;
+    int open_count = 0;
+    int close_count = 0;
+    int64_t total_rows = 2;
+    bool eof_with_first_batch = true;
+    bool inject_delete_conjunct = false;
+    std::shared_ptr<FileScanRequest> last_request;
+    std::shared_ptr<ConditionCacheContext> condition_cache_ctx;
+};
+
+class FakeFileReader final : public FileReader {
+public:
+    FakeFileReader(std::shared_ptr<io::FileSystemProperties>& system_properties,
+                   std::unique_ptr<io::FileDescription>& file_description,
+                   std::vector<ColumnDefinition> schema, std::shared_ptr<FakeFileReaderState> state)
+            : FileReader(system_properties, file_description, nullptr, nullptr),
+              _schema(std::move(schema)),
+              _state(std::move(state)) {}
+
+    Status init(RuntimeState* state) override {
+        (void)state;
+        ++_state->init_count;
+        _eof = false;
+        return Status::OK();
+    }
+
+    Status get_schema(std::vector<ColumnDefinition>* file_schema) const override {
+        DORIS_CHECK(file_schema != nullptr);
+        *file_schema = _schema;
+        for (auto& column : *file_schema) {
+            column = make_nullable_column_definition(std::move(column));
+        }
+        return Status::OK();
+    }
+
+    Status open(std::shared_ptr<FileScanRequest> request) override {
+        RETURN_IF_ERROR(FileReader::open(std::move(request)));
+        _state->last_request = _request;
+        ++_state->open_count;
+        _returned_batch = false;
+        return Status::OK();
+    }
+
+    Status get_block(Block* file_block, size_t* rows, bool* eof) override {
+        DORIS_CHECK(file_block != nullptr);
+        DORIS_CHECK(rows != nullptr);
+        DORIS_CHECK(eof != nullptr);
+        DORIS_CHECK(_request != nullptr);
+        if (_returned_batch) {
+            *rows = 0;
+            *eof = true;
+            return Status::OK();
+        }
+
+        for (const auto& [file_column_id, block_position] : _request->local_positions) {
+            if (file_column_id == LocalColumnId(0)) {
+                auto column = ColumnInt32::create();
+                column->insert_value(1);
+                column->insert_value(2);
+                file_block->replace_by_position(block_position.value(),
+                                                make_not_null_nullable_column(std::move(column)));
+            } else if (file_column_id == LocalColumnId(1)) {
+                auto column = ColumnString::create();
+                column->insert_data("one", 3);
+                column->insert_data("two", 3);
+                file_block->replace_by_position(block_position.value(),
+                                                make_not_null_nullable_column(std::move(column)));
+            } else if (file_column_id == LocalColumnId(2)) {
+                auto country_values = ColumnString::create();
+                country_values->insert_data("USA", 3);
+                country_values->insert_data("UK", 2);
+                auto country_column = make_not_null_nullable_column(std::move(country_values));
+
+                auto city_column = ColumnString::create();
+                city_column->insert_data("New York", 8);
+                city_column->insert_data("London", 6);
+
+                MutableColumns struct_children;
+                struct_children.push_back(std::move(country_column));
+                struct_children.push_back(make_not_null_nullable_column(std::move(city_column)));
+                auto struct_column = ColumnStruct::create(std::move(struct_children));
+
+                file_block->replace_by_position(
+                        block_position.value(),
+                        make_not_null_nullable_column(std::move(struct_column)));
+            } else {
+                return Status::InvalidArgument("Unexpected fake file column id {}",
+                                               file_column_id.value());
+            }
+        }
+
+        _returned_batch = true;
+        *rows = 2;
+        *eof = _state->eof_with_first_batch;
+        if (_state->condition_cache_ctx != nullptr && !_state->condition_cache_ctx->is_hit &&
+            _state->condition_cache_ctx->filter_result != nullptr &&
+            !_state->condition_cache_ctx->filter_result->empty()) {
+            // The real file reader marks a granule after local row-level predicates keep at least
+            // one row from that granule. The fake reader does it here so TableReader tests can
+            // focus on condition-cache lifecycle decisions without depending on Parquet internals.
+            (*_state->condition_cache_ctx->filter_result)[0] = true;
+        }
+        return Status::OK();
+    }
+
+    void set_condition_cache_context(std::shared_ptr<ConditionCacheContext> ctx) override {
+        _state->condition_cache_ctx = std::move(ctx);
+    }
+
+    int64_t get_total_rows() const override { return _state->total_rows; }
+
+    Status close() override {
+        ++_state->close_count;
+        _request.reset();
+        _eof = true;
+        return Status::OK();
+    }
+
+private:
+    std::vector<ColumnDefinition> _schema;
+    std::shared_ptr<FakeFileReaderState> _state;
+    bool _returned_batch = false;
+};
+
+class FakeTableReader final : public TableReader {
+public:
+    FakeTableReader(std::vector<ColumnDefinition> file_schema,
+                    std::shared_ptr<FakeFileReaderState> state)
+            : _file_schema(std::move(file_schema)), _state(std::move(state)) {}
+
+protected:
+    Status create_file_reader(std::unique_ptr<FileReader>* reader) override {
+        DORIS_CHECK(reader != nullptr);
+        auto system_properties = std::make_shared<io::FileSystemProperties>();
+        system_properties->system_type = TFileType::FILE_LOCAL;
+        auto file_description = std::make_unique<io::FileDescription>();
+        file_description->path = "fake-table-reader-input";
+        *reader = std::make_unique<FakeFileReader>(system_properties, file_description,
+                                                   _file_schema, _state);
+        return Status::OK();
+    }
+
+    Status customize_file_scan_request(FileScanRequest* file_request) override {
+        RETURN_IF_ERROR(TableReader::customize_file_scan_request(file_request));
+        if (_state->inject_delete_conjunct) {
+            // Table-format delete handling is represented in v2 by TableReader injecting
+            // delete_conjuncts into the file scan request. The fake reader does not execute it;
+            // this only tests that condition cache is disabled once such table-level delete state
+            // is present in the request.
+            file_request->delete_conjuncts.push_back(
+                    VExprContext::create_shared(table_int32_literal(1)));
+        }
+        return Status::OK();
+    }
+
+private:
+    std::vector<ColumnDefinition> _file_schema;
+    std::shared_ptr<FakeFileReaderState> _state;
+};
+
+class ScopedConditionCacheForTest {
+public:
+    ScopedConditionCacheForTest()
+            : _previous(ExecEnv::GetInstance()->get_condition_cache()),
+              _cache(segment_v2::ConditionCache::create_global_cache(1024 * 1024, 4)) {
+        ExecEnv::GetInstance()->_condition_cache = _cache.get();
+    }
+
+    ~ScopedConditionCacheForTest() { ExecEnv::GetInstance()->_condition_cache = _previous; }
+
+    segment_v2::ConditionCache* get() { return _cache.get(); }
+
+private:
+    segment_v2::ConditionCache* _previous = nullptr;
+    std::unique_ptr<segment_v2::ConditionCache> _cache;
+};
+
+TEST(TableReaderTest, CanUseInjectedFileReaderForStandaloneUnitTest) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+    file_schema.push_back(make_file_column(1, "value", std::make_shared<DataTypeString>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_FALSE(eos);
+
+    ASSERT_EQ(fake_state->init_count, 1);
+    ASSERT_EQ(fake_state->open_count, 1);
+    ASSERT_EQ(fake_state->close_count, 1);
+    ASSERT_NE(fake_state->last_request, nullptr);
+    ASSERT_EQ(fake_state->last_request->local_positions.at(LocalColumnId(1)).value(), 0);
+    ASSERT_EQ(fake_state->last_request->local_positions.at(LocalColumnId(0)).value(), 1);
+    EXPECT_EQ(projection_ids(fake_state->last_request->non_predicate_columns),
+              std::vector<int32_t>({1, 0}));
+    EXPECT_TRUE(fake_state->last_request->predicate_columns.empty());
+
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 0));
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(block.rows(), 2);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "one");
+    EXPECT_EQ(value_column.get_data_at(1).to_string(), "two");
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 2);
+
+    block = build_table_block(projected_columns);
+    eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+}
+
+TEST(TableReaderTest, DebugStringCoversReaderStateAndEnumNames) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+    file_schema.push_back(make_file_column(1, "value", std::make_shared<DataTypeString>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+    projected_columns[0].name_mapping = {"legacy_id"};
+    set_name_identifiers(&projected_columns);
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(0),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(0), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    fake_state->eof_with_first_batch = false;
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = std::make_shared<io::IOContext>(),
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.partition_values.emplace("dt", Field::create_field<TYPE_STRING>("2026-06-29"));
+    split_options.current_range.__set_path("fake-table-reader-input");
+    split_options.current_range.__set_file_size(64);
+    split_options.current_range.__set_start_offset(7);
+    split_options.current_range.__set_size(11);
+    split_options.current_range.__set_modification_time(13);
+    split_options.current_range.__set_fs_name("local-fs");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto debug = reader.debug_string();
+    EXPECT_NE(debug.find("format=PARQUET"), std::string::npos);
+    EXPECT_NE(debug.find("push_down_agg_type=COUNT"), std::string::npos);
+    EXPECT_NE(debug.find("current_file=FileDescription{path=fake-table-reader-input"),
+              std::string::npos);
+    EXPECT_NE(debug.find("partition_values={dt}"), std::string::npos);
+    EXPECT_NE(debug.find("table_filters=[TableFilter{conjunct=VExprContext"), std::string::npos);
+    EXPECT_NE(debug.find("table_column_predicates={0:{predicate_count=1}}"), std::string::npos);
+    EXPECT_NE(debug.find("ColumnDefinition{name=id"), std::string::npos);
+    EXPECT_NE(debug.find("name_mapping=[legacy_id]"), std::string::npos);
+    EXPECT_NE(debug.find("ColumnMapping{global_index=0"), std::string::npos);
+    EXPECT_NE(debug.find("FileBlockColumn{file_column_id=0"), std::string::npos);
+    ASSERT_TRUE(reader.close().ok());
+
+    const std::vector<FileFormat> formats {FileFormat::ORC,  FileFormat::CSV, FileFormat::JSON,
+                                           FileFormat::TEXT, FileFormat::JNI, FileFormat::NATIVE,
+                                           FileFormat::ARROW};
+    const std::vector<std::string> format_names {"ORC", "CSV",    "JSON", "TEXT",
+                                                 "JNI", "NATIVE", "ARROW"};
+    for (size_t idx = 0; idx < formats.size(); ++idx) {
+        TableReader enum_reader;
+        ASSERT_TRUE(enum_reader
+                            .init({
+                                    .projected_columns = {},
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = formats[idx],
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                            .ok());
+        EXPECT_NE(enum_reader.debug_string().find("format=" + format_names[idx]),
+                  std::string::npos);
+    }
+
+    const std::vector<TPushAggOp::type> agg_ops {TPushAggOp::type::NONE, TPushAggOp::type::MINMAX,
+                                                 TPushAggOp::type::MIX,
+                                                 TPushAggOp::type::COUNT_ON_INDEX};
+    const std::vector<std::string> agg_names {"NONE", "MINMAX", "MIX", "COUNT_ON_INDEX"};
+    for (size_t idx = 0; idx < agg_ops.size(); ++idx) {
+        TableReader enum_reader;
+        ASSERT_TRUE(enum_reader
+                            .init({
+                                    .projected_columns = {},
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = agg_ops[idx],
+                            })
+                            .ok());
+        EXPECT_NE(enum_reader.debug_string().find("push_down_agg_type=" + agg_names[idx]),
+                  std::string::npos);
+    }
+}
+
+TEST(TableReaderTest, AnnotateProjectedColumnUsesCurrentHistorySchemaForNestedTypes) {
+    TFileScanRangeParams scan_params;
+    scan_params.__set_current_schema_id(200);
+
+    auto profile_field = external_struct_field(
+            "profile", 20,
+            {external_array_field("old_scores", 21, external_schema_field("old_score", 22),
+                                  {"scores"}),
+             external_map_field("old_props", 23, external_schema_field("old_key", 24),
+                                external_schema_field("old_value", 25), {"props"})},
+            {"user_profile"});
+    scan_params.__set_history_schema_info(
+            {external_schema(100, {external_schema_field("ignored_profile", 10)}),
+             external_schema(200, {profile_field})});
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    auto scores_type = std::make_shared<DataTypeArray>(int_type);
+    auto props_type = std::make_shared<DataTypeMap>(string_type, string_type);
+    auto profile_type = std::make_shared<DataTypeStruct>(DataTypes {scores_type, props_type},
+                                                         Strings {"scores", "props"});
+
+    ColumnDefinition profile_column = make_table_column(-1, "user_profile", profile_type);
+    ProjectedColumnBuildContext context;
+    context.scan_params = &scan_params;
+    TFileScanSlotInfo slot_info;
+    TableReader reader;
+    ASSERT_TRUE(reader.annotate_projected_column(slot_info, &context, &profile_column).ok());
+
+    EXPECT_EQ(profile_column.get_identifier_field_id(), 20);
+    EXPECT_EQ(profile_column.name_mapping, std::vector<std::string>({"user_profile"}));
+    ASSERT_TRUE(context.schema_column.has_value());
+    ASSERT_EQ(context.schema_column->children.size(), 2);
+    EXPECT_EQ(context.schema_column->children[0].name, "old_scores");
+    EXPECT_EQ(context.schema_column->children[0].get_identifier_field_id(), 21);
+    ASSERT_EQ(context.schema_column->children[0].children.size(), 1);
+    EXPECT_EQ(context.schema_column->children[0].children[0].name, "element");
+    EXPECT_EQ(context.schema_column->children[0].children[0].get_identifier_field_id(), 22);
+    ASSERT_EQ(context.schema_column->children[1].children.size(), 2);
+    EXPECT_EQ(context.schema_column->children[1].name, "old_props");
+    EXPECT_EQ(context.schema_column->children[1].children[0].name, "key");
+    EXPECT_EQ(context.schema_column->children[1].children[0].get_identifier_field_id(), 24);
+    EXPECT_EQ(context.schema_column->children[1].children[1].name, "value");
+    EXPECT_EQ(context.schema_column->children[1].children[1].get_identifier_field_id(), 25);
+}
+
+TEST(TableReaderTest, ComplexRematerializeCastsScalarChildToTableType) {
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto nullable_string_type = make_nullable(string_type);
+    const auto file_struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_string_type, string_type}, Strings {"country", "city"}));
+    auto file_struct_column = make_file_column(2, "struct_column", file_struct_type);
+    file_struct_column.children = {make_file_column(0, "country", nullable_string_type),
+                                   make_file_column(1, "city", string_type)};
+    std::vector<ColumnDefinition> file_schema = {file_struct_column};
+
+    const auto table_struct_type = make_nullable(std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_string_type, nullable_string_type}, Strings {"country", "city"}));
+    auto country_child = make_table_column(0, "country", nullable_string_type);
+    auto city_child = make_table_column(1, "city", nullable_string_type);
+    auto table_struct_column = make_table_column(2, "struct_column", table_struct_type);
+    table_struct_column.children = {country_child, city_child};
+    std::vector<ColumnDefinition> projected_columns = {table_struct_column};
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    const auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+    ASSERT_TRUE(block.check_type_and_column().ok()) << block.dump_structure();
+
+    const auto& result_nullable =
+            assert_cast<const ColumnNullable&>(*block.get_by_position(0).column);
+    const auto& struct_result =
+            assert_cast<const ColumnStruct&>(result_nullable.get_nested_column());
+    ASSERT_EQ(struct_result.get_columns().size(), 2);
+    const auto& country_column = assert_cast<const ColumnNullable&>(struct_result.get_column(0));
+    const auto& city_column = assert_cast<const ColumnNullable&>(struct_result.get_column(1));
+    const auto& country_values =
+            assert_cast<const ColumnString&>(country_column.get_nested_column());
+    const auto& city_values = assert_cast<const ColumnString&>(city_column.get_nested_column());
+    ASSERT_EQ(city_column.size(), 2);
+    EXPECT_FALSE(city_column.is_null_at(0));
+    EXPECT_FALSE(city_column.is_null_at(1));
+    EXPECT_EQ(country_values.get_data_at(0).to_string(), "USA");
+    EXPECT_EQ(country_values.get_data_at(1).to_string(), "UK");
+    EXPECT_EQ(city_values.get_data_at(0).to_string(), "New York");
+    EXPECT_EQ(city_values.get_data_at(1).to_string(), "London");
+}
+
+TEST(TableReaderTest, ReopenSplitAfterClose) {
+    const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const std::vector<std::string> file_paths = {
+            (test_dir / "split_1.parquet").string(),
+            (test_dir / "split_2.parquet").string(),
+            (test_dir / "split_3.parquet").string(),
+    };
+    write_parquet_file(file_paths[0], 1, "one");
+    write_parquet_file(file_paths[1], 2, "two");
+    write_parquet_file(file_paths[2], 3, "three");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(1, 1, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    // Simulate the scanner lifecycle for three different splits:
+    // init() once, then repeat prepare_split() -> get_block() -> close().
+    // This verifies TableReader::close() fully releases the previous low-level reader and task
+    // state, so a later prepare_split() can open and read a new split on the same TableReader.
+    // The table-level conjunct is also rebuilt for each split. The projection order puts value
+    // before id, so the pushed conjunct has to be rewritten to the ParquetReader file-local block
+    // position every time a new split is opened.
+    std::vector<int32_t> ids;
+    std::vector<std::string> values;
+    for (const auto& file_path : file_paths) {
+        auto split_options = build_split_options(file_path);
+        ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+        Block block = build_table_block(projected_columns);
+        bool eos = false;
+        ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+        ASSERT_FALSE(eos);
+
+        const auto& value_column =
+                assert_cast<const ColumnString&>(expect_not_null_table_column(block, 0));
+        const auto& id_column =
+                assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+        ASSERT_EQ(id_column.size(), 1);
+        ASSERT_EQ(value_column.size(), 1);
+        ids.push_back(id_column.get_element(0));
+        values.push_back(value_column.get_data_at(0).to_string());
+
+        ASSERT_TRUE(reader.close().ok());
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({1, 2, 3}));
+    EXPECT_EQ(values, std::vector<std::string>({"one", "two", "three"}));
+
+    std::filesystem::remove_all(test_dir);
+}
+
+// Scenario: column predicates are pruning hints only. They do not produce a row-level survivor
+// bitmap, so TableReader must not enable condition cache when the scan request has no conjuncts.
+TEST(TableReaderTest, ConditionCacheSkipsColumnPredicateOnlyRequest) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(0),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(0), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .condition_cache_digest = 7,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_EQ(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_EQ(reader.condition_cache_hit_count(), 0);
+    ASSERT_TRUE(reader.close().ok());
+}
+
+// Scenario: runtime filters can arrive late and are not represented by the stable predicate digest.
+// A MISS must not insert a bitmap for `stable predicate AND runtime filter` under the stable digest.
+TEST(TableReaderTest, ConditionCacheSkipsRuntimeFilterConjunct) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(
+            reader.init({
+                                .projected_columns = projected_columns,
+                                .column_predicates = {},
+                                .conjuncts = {prepared_conjunct(
+                                        &state, runtime_filter_wrapper_expr(
+                                                        table_int32_greater_than_expr(0, 0, 0)))},
+                                .format = FileFormat::PARQUET,
+                                .scan_params = nullptr,
+                                .io_ctx = nullptr,
+                                .runtime_state = &state,
+                                .scanner_profile = nullptr,
+                                .condition_cache_digest = 7,
+                        })
+                    .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_EQ(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_EQ(reader.condition_cache_hit_count(), 0);
+    ASSERT_TRUE(reader.close().ok());
+}
+
+// Scenario: table-format delete files/deletion vectors are outside the data-file cache key. When
+// TableReader injects delete conjuncts into the file scan request, condition cache must be disabled
+// for that split.
+TEST(TableReaderTest, ConditionCacheSkipsRequestWithDeleteConjuncts) {
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    fake_state->inject_delete_conjunct = true;
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .condition_cache_digest = 7,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_EQ(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_EQ(reader.condition_cache_hit_count(), 0);
+    ASSERT_TRUE(reader.close().ok());
+}
+
+// Scenario: a MISS bitmap is safe to publish only after the physical reader reaches EOF. This test
+// returns EOF together with the first batch and verifies TableReader publishes the marked bitmap.
+TEST(TableReaderTest, ConditionCacheMissPublishesBitmapAfterReaderEof) {
+    ScopedConditionCacheForTest cache;
+
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    fake_state->total_rows = ConditionCacheContext::GRANULE_SIZE;
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .condition_cache_digest = 7,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_NE(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_FALSE(fake_state->condition_cache_ctx->is_hit);
+
+    segment_v2::ConditionCache::ExternalCacheKey key("fake-table-reader-input", 0, -1, 7, 0, -1);
+    segment_v2::ConditionCacheHandle handle;
+    ASSERT_TRUE(cache.get()->lookup(key, &handle));
+    const auto cached_bitmap = handle.get_filter_result();
+    ASSERT_NE(cached_bitmap, nullptr);
+    ASSERT_FALSE(cached_bitmap->empty());
+    EXPECT_TRUE((*cached_bitmap)[0]);
+
+    ASSERT_TRUE(reader.close().ok());
+}
+
+// Scenario: LIMIT/cancel can close a reader before it reaches EOF. TableReader must drop the MISS
+// bitmap because unvisited granules would still be false and unsafe for future cache hits.
+TEST(TableReaderTest, ConditionCacheMissIsDroppedWhenReaderClosesBeforeEof) {
+    ScopedConditionCacheForTest cache;
+
+    std::vector<ColumnDefinition> file_schema;
+    file_schema.push_back(make_file_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    set_name_identifiers(&projected_columns);
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    auto fake_state = std::make_shared<FakeFileReaderState>();
+    fake_state->total_rows = ConditionCacheContext::GRANULE_SIZE;
+    fake_state->eof_with_first_batch = false;
+    FakeTableReader reader(file_schema, fake_state);
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 0))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .condition_cache_digest = 7,
+                            })
+                        .ok());
+
+    SplitReadOptions split_options;
+    split_options.current_range.__set_path("fake-table-reader-input");
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_NE(fake_state->condition_cache_ctx, nullptr);
+    EXPECT_FALSE(fake_state->condition_cache_ctx->is_hit);
+
+    ASSERT_TRUE(reader.close().ok());
+    segment_v2::ConditionCache::ExternalCacheKey key("fake-table-reader-input", 0, -1, 7, 0, -1);
+    segment_v2::ConditionCacheHandle handle;
+    EXPECT_FALSE(cache.get()->lookup(key, &handle));
+}
+
+TEST(TableReaderTest, PushDownCountFromNewParquetReader) {
+    const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_count_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3, 4, 5}, {10, 20, 30, 40, 50},
+                                {"one", "two", "three", "four", "five"}, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 5);
+    EXPECT_FALSE(is_column_const(*block.get_by_position(0).column));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, TableLevelCountUsesAssignedRowCount) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_table_count_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TQueryOptions query_options;
+    query_options.__set_batch_size(2);
+    RuntimeState state {query_options, TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    auto split_options = build_split_options(file_path);
+    set_table_level_row_count(&split_options, 5);
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.rows(), 2);
+
+    block = build_table_block(projected_columns);
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.rows(), 2);
+
+    block = build_table_block(projected_columns);
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.rows(), 1);
+
+    block = build_table_block(projected_columns);
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.rows(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFromNewParquetReader) {
+    const auto test_dir = std::filesystem::temp_directory_path() / "doris_table_reader_minmax_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {3, 1, 5, 2}, {30, 10, 50, 20},
+                                {"three", "one", "five", "two"}, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    projected_columns.push_back(make_table_column(1, "score", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    const auto& score_column =
+            assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 5);
+    EXPECT_EQ(score_column.get_element(0), 10);
+    EXPECT_EQ(score_column.get_element(1), 50);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxCastsFileValueToTableType) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_cast_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {3, 1, 5, 2}, {30, 10, 50, 20},
+                                {"three", "one", "five", "two"}, 2);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt64>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt64&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 5);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFromProjectedStructLeaf) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_struct_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_struct_parquet_file(file_path, {3, 1, 5, 2}, 2);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    auto id_child = make_table_column(0, "id", int_type);
+    auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"id"});
+    auto struct_column = make_table_column(100, "s", struct_type);
+    struct_column.children = {id_child};
+    std::vector<ColumnDefinition> projected_columns = {struct_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& struct_result =
+            assert_cast<const ColumnStruct&>(expect_not_null_table_column(block, 0));
+    ASSERT_EQ(struct_result.get_columns().size(), 1);
+    const auto& ids = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(struct_result.get_column(0)));
+    EXPECT_EQ(ids.get_element(0), 1);
+    EXPECT_EQ(ids.get_element(1), 5);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFallsBackForProjectedListStructLeaf) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_list_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_list_struct_parquet_file(file_path);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    auto element_type = std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int_type, nullable_int_type}, Strings {"a", "b"});
+    auto nullable_element_type = make_nullable(element_type);
+    auto list_column =
+            make_table_column(100, "xs", std::make_shared<DataTypeArray>(nullable_element_type));
+    std::vector<ColumnDefinition> projected_columns = {list_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status;
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& array_result =
+            assert_cast<const ColumnArray&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(array_result.get_offsets()[0], 2);
+    EXPECT_EQ(array_result.get_offsets()[1], 3);
+    EXPECT_EQ(array_result.get_offsets()[2], 4);
+    const auto& nullable_elements = assert_cast<const ColumnNullable&>(array_result.get_data());
+    for (const auto is_null : nullable_elements.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    const auto& element_struct =
+            assert_cast<const ColumnStruct&>(nullable_elements.get_nested_column());
+    ASSERT_EQ(element_struct.get_columns().size(), 2);
+    const auto& a_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(0)));
+    EXPECT_EQ(a_values.get_element(0), 10);
+    EXPECT_EQ(a_values.get_element(1), 20);
+    EXPECT_EQ(a_values.get_element(2), 30);
+    EXPECT_EQ(a_values.get_element(3), 40);
+    const auto& b_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(1)));
+    EXPECT_EQ(b_values.get_element(0), 11);
+    EXPECT_EQ(b_values.get_element(1), 21);
+    EXPECT_EQ(b_values.get_element(2), 31);
+    EXPECT_EQ(b_values.get_element(3), 41);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedListStructReadsSelectedElementChild) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_list_projection_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_list_struct_parquet_file(file_path);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    auto a_child = make_table_column(0, "a", int_type);
+    auto element_type = std::make_shared<DataTypeStruct>(DataTypes {int_type}, Strings {"a"});
+    auto nullable_element_type = make_nullable(element_type);
+    auto element_child = make_table_column(0, "element", nullable_element_type);
+    element_child.children = {a_child};
+    auto list_column =
+            make_table_column(100, "xs", std::make_shared<DataTypeArray>(nullable_element_type));
+    list_column.children = {element_child};
+    std::vector<ColumnDefinition> projected_columns = {list_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& array_result =
+            assert_cast<const ColumnArray&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(array_result.get_offsets()[0], 2);
+    EXPECT_EQ(array_result.get_offsets()[1], 3);
+    EXPECT_EQ(array_result.get_offsets()[2], 4);
+    const auto& nullable_elements = assert_cast<const ColumnNullable&>(array_result.get_data());
+    const auto& element_struct =
+            assert_cast<const ColumnStruct&>(nullable_elements.get_nested_column());
+    ASSERT_EQ(element_struct.get_columns().size(), 1);
+    const auto& a_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(0)));
+    EXPECT_EQ(a_values.get_element(0), 10);
+    EXPECT_EQ(a_values.get_element(1), 20);
+    EXPECT_EQ(a_values.get_element(2), 30);
+    EXPECT_EQ(a_values.get_element(3), 40);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedListStructReordersRenamedAndMissingElementChildren) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_list_schema_evolution_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_list_struct_parquet_file(file_path);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    const auto string_type = std::make_shared<DataTypeString>();
+    auto b_child = make_table_column(1, "renamed_b", nullable_int_type);
+    b_child.name_mapping = {"b"};
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto a_child = make_table_column(0, "renamed_a", nullable_int_type);
+    a_child.name_mapping = {"a"};
+    auto element_type = std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_int_type, string_type, nullable_int_type},
+            Strings {"renamed_b", "missing_child", "renamed_a"});
+    auto nullable_element_type = make_nullable(element_type);
+    auto element_child = make_table_column(0, "element", nullable_element_type);
+    element_child.children = {b_child, missing_child, a_child};
+    auto list_column =
+            make_table_column(100, "xs", std::make_shared<DataTypeArray>(nullable_element_type));
+    list_column.children = {element_child};
+    std::vector<ColumnDefinition> projected_columns = {list_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& array_result =
+            assert_cast<const ColumnArray&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(array_result.get_offsets()[0], 2);
+    EXPECT_EQ(array_result.get_offsets()[1], 3);
+    EXPECT_EQ(array_result.get_offsets()[2], 4);
+    const auto& nullable_elements = assert_cast<const ColumnNullable&>(array_result.get_data());
+    const auto& element_struct =
+            assert_cast<const ColumnStruct&>(nullable_elements.get_nested_column());
+    ASSERT_EQ(element_struct.get_columns().size(), 3);
+    const auto& b_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(0)));
+    const auto& missing_values = element_struct.get_column(1);
+    const auto& a_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(element_struct.get_column(2)));
+    EXPECT_EQ(b_values.get_element(0), 11);
+    EXPECT_EQ(b_values.get_element(1), 21);
+    EXPECT_EQ(b_values.get_element(2), 31);
+    EXPECT_EQ(b_values.get_element(3), 41);
+    expect_nullable_column_all_null(missing_values);
+    EXPECT_EQ(a_values.get_element(0), 10);
+    EXPECT_EQ(a_values.get_element(1), 20);
+    EXPECT_EQ(a_values.get_element(2), 30);
+    EXPECT_EQ(a_values.get_element(3), 40);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+// Scenario: when every projected array-element struct child is missing/default-only, the reader
+// still receives a full element projection and can materialize the default child without crashing.
+TEST(TableReaderTest, ProjectedListStructOnlyMissingElementChildFallsBackToFullElement) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_list_only_missing_child_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_list_struct_parquet_file(file_path);
+
+    const auto string_type = std::make_shared<DataTypeString>();
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto element_type =
+            std::make_shared<DataTypeStruct>(DataTypes {string_type}, Strings {"missing_child"});
+    auto nullable_element_type = make_nullable(element_type);
+    auto element_child = make_table_column(0, "element", nullable_element_type);
+    element_child.children = {missing_child};
+    auto list_column =
+            make_table_column(100, "xs", std::make_shared<DataTypeArray>(nullable_element_type));
+    list_column.children = {element_child};
+    std::vector<ColumnDefinition> projected_columns = {list_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& array_result =
+            assert_cast<const ColumnArray&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(array_result.get_offsets()[0], 2);
+    EXPECT_EQ(array_result.get_offsets()[1], 3);
+    EXPECT_EQ(array_result.get_offsets()[2], 4);
+    const auto& nullable_elements = assert_cast<const ColumnNullable&>(array_result.get_data());
+    const auto& element_struct =
+            assert_cast<const ColumnStruct&>(nullable_elements.get_nested_column());
+    ASSERT_EQ(element_struct.get_columns().size(), 1);
+    expect_nullable_column_all_null(element_struct.get_column(0));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFallsBackForProjectedMapValueStructLeaf) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_map_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_map_struct_parquet_file(file_path);
+
+    const auto key_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto nullable_string_type = make_nullable(string_type);
+    auto b_child = make_table_column(1, "b", nullable_string_type);
+    auto value_type =
+            std::make_shared<DataTypeStruct>(DataTypes {nullable_string_type}, Strings {"b"});
+    auto nullable_value_type = make_nullable(value_type);
+    auto value_child = make_table_column(1, "value", nullable_value_type);
+    value_child.children = {b_child};
+    auto map_column = make_table_column(
+            100, "kv", std::make_shared<DataTypeMap>(key_type, nullable_value_type));
+    map_column.children = {value_child};
+    std::vector<ColumnDefinition> projected_columns = {map_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& map_result = assert_cast<const ColumnMap&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(map_result.get_offsets()[0], 2);
+    EXPECT_EQ(map_result.get_offsets()[1], 3);
+    EXPECT_EQ(map_result.get_offsets()[2], 3);
+    const auto& keys = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(map_result.get_keys()));
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 3);
+    const auto& nullable_values = assert_cast<const ColumnNullable&>(map_result.get_values());
+    for (const auto is_null : nullable_values.get_null_map_data()) {
+        EXPECT_EQ(is_null, 0);
+    }
+    const auto& value_struct =
+            assert_cast<const ColumnStruct&>(nullable_values.get_nested_column());
+    ASSERT_EQ(value_struct.get_columns().size(), 1);
+    const auto& b_values = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(value_struct.get_column(0)));
+    EXPECT_EQ(b_values.get_data_at(0).to_string(), "ma");
+    EXPECT_EQ(b_values.get_data_at(1).to_string(), "mb");
+    EXPECT_EQ(b_values.get_data_at(2).to_string(), "mc");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedMapValueStructReordersRenamedAndMissingChildren) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_map_schema_evolution_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_map_struct_parquet_file(file_path);
+
+    const auto key_type = std::make_shared<DataTypeInt32>();
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto nullable_int_type = make_nullable(int_type);
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto nullable_string_type = make_nullable(string_type);
+    auto b_child = make_table_column(1, "renamed_b", nullable_string_type);
+    b_child.name_mapping = {"b"};
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto a_child = make_table_column(0, "renamed_a", nullable_int_type);
+    a_child.name_mapping = {"a"};
+    auto value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {nullable_string_type, string_type, nullable_int_type},
+            Strings {"renamed_b", "missing_child", "renamed_a"});
+    auto nullable_value_type = make_nullable(value_type);
+    auto value_child = make_table_column(1, "value", nullable_value_type);
+    value_child.children = {b_child, missing_child, a_child};
+    auto map_column = make_table_column(
+            100, "kv", std::make_shared<DataTypeMap>(key_type, nullable_value_type));
+    map_column.children = {value_child};
+    std::vector<ColumnDefinition> projected_columns = {map_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 3);
+    const auto& map_result = assert_cast<const ColumnMap&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(map_result.get_offsets()[0], 2);
+    EXPECT_EQ(map_result.get_offsets()[1], 3);
+    EXPECT_EQ(map_result.get_offsets()[2], 3);
+    const auto& keys = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(map_result.get_keys()));
+    EXPECT_EQ(keys.get_element(0), 1);
+    EXPECT_EQ(keys.get_element(1), 2);
+    EXPECT_EQ(keys.get_element(2), 3);
+    const auto& nullable_values = assert_cast<const ColumnNullable&>(map_result.get_values());
+    const auto& value_struct =
+            assert_cast<const ColumnStruct&>(nullable_values.get_nested_column());
+    ASSERT_EQ(value_struct.get_columns().size(), 3);
+    const auto& b_values = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(value_struct.get_column(0)));
+    const auto& missing_values = value_struct.get_column(1);
+    const auto& a_values = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(value_struct.get_column(2)));
+    EXPECT_EQ(b_values.get_data_at(0).to_string(), "ma");
+    EXPECT_EQ(b_values.get_data_at(1).to_string(), "mb");
+    EXPECT_EQ(b_values.get_data_at(2).to_string(), "mc");
+    expect_nullable_column_all_null(missing_values);
+    EXPECT_EQ(a_values.get_element(0), 10);
+    EXPECT_EQ(a_values.get_element(1), 20);
+    EXPECT_EQ(a_values.get_element(2), 30);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, MaterializeMapKeyStructReordersRenamedChildren) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto file_key_type =
+            std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type}, Strings {"a", "b"});
+    const auto table_key_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, int_type}, Strings {"renamed_b", "renamed_a"});
+    const auto file_map_type = std::make_shared<DataTypeMap>(file_key_type, int_type);
+    const auto table_map_type = std::make_shared<DataTypeMap>(table_key_type, int_type);
+
+    ColumnMapping a_mapping;
+    a_mapping.table_column_name = "renamed_a";
+    a_mapping.file_column_name = "a";
+    a_mapping.file_local_id = 0;
+    a_mapping.table_type = int_type;
+    a_mapping.file_type = int_type;
+    a_mapping.is_trivial = true;
+
+    ColumnMapping b_mapping;
+    b_mapping.table_column_name = "renamed_b";
+    b_mapping.file_column_name = "b";
+    b_mapping.file_local_id = 1;
+    b_mapping.table_type = string_type;
+    b_mapping.file_type = string_type;
+    b_mapping.is_trivial = true;
+
+    ColumnMapping key_mapping;
+    key_mapping.table_column_name = "key";
+    key_mapping.file_column_name = "key";
+    key_mapping.file_local_id = 0;
+    key_mapping.table_type = table_key_type;
+    key_mapping.file_type = file_key_type;
+    key_mapping.is_trivial = false;
+    key_mapping.child_mappings = {b_mapping, a_mapping};
+
+    ColumnMapping value_mapping;
+    value_mapping.table_column_name = "value";
+    value_mapping.file_column_name = "value";
+    value_mapping.file_local_id = 1;
+    value_mapping.table_type = int_type;
+    value_mapping.file_type = int_type;
+    value_mapping.is_trivial = true;
+
+    ColumnMapping map_mapping;
+    map_mapping.table_column_name = "kv";
+    map_mapping.file_column_name = "kv";
+    map_mapping.table_type = table_map_type;
+    map_mapping.file_type = file_map_type;
+    map_mapping.is_trivial = false;
+    map_mapping.child_mappings = {key_mapping, value_mapping};
+
+    auto a_keys = ColumnInt32::create();
+    a_keys->insert_value(10);
+    a_keys->insert_value(20);
+    a_keys->insert_value(30);
+    auto b_keys = ColumnString::create();
+    b_keys->insert_value("x");
+    b_keys->insert_value("y");
+    b_keys->insert_value("z");
+    MutableColumns key_children;
+    key_children.push_back(std::move(a_keys));
+    key_children.push_back(std::move(b_keys));
+    auto key_column = ColumnStruct::create(std::move(key_children));
+
+    auto value_column = ColumnInt32::create();
+    value_column->insert_value(100);
+    value_column->insert_value(200);
+    value_column->insert_value(300);
+    auto offsets_column = ColumnArray::ColumnOffsets::create();
+    offsets_column->insert_value(2);
+    offsets_column->insert_value(3);
+    ColumnPtr file_column = ColumnMap::create(std::move(key_column), std::move(value_column),
+                                              std::move(offsets_column));
+
+    TableReaderMaterializeTestHelper reader;
+    ColumnPtr result_column;
+    ASSERT_TRUE(reader._materialize_map_mapping_column(map_mapping, file_column, 2, &result_column)
+                        .ok());
+
+    const auto& result_map = assert_cast<const ColumnMap&>(*result_column);
+    EXPECT_EQ(result_map.get_offsets()[0], 2);
+    EXPECT_EQ(result_map.get_offsets()[1], 3);
+    const auto& result_key = assert_cast<const ColumnStruct&>(result_map.get_keys());
+    ASSERT_EQ(result_key.get_columns().size(), 2);
+    const auto& b_result = assert_cast<const ColumnString&>(result_key.get_column(0));
+    const auto& a_result = assert_cast<const ColumnInt32&>(result_key.get_column(1));
+    EXPECT_EQ(b_result.get_data_at(0).to_string(), "x");
+    EXPECT_EQ(b_result.get_data_at(1).to_string(), "y");
+    EXPECT_EQ(b_result.get_data_at(2).to_string(), "z");
+    EXPECT_EQ(a_result.get_element(0), 10);
+    EXPECT_EQ(a_result.get_element(1), 20);
+    EXPECT_EQ(a_result.get_element(2), 30);
+
+    const auto& result_value = assert_cast<const ColumnInt32&>(result_map.get_values());
+    EXPECT_EQ(result_value.get_element(0), 100);
+    EXPECT_EQ(result_value.get_element(1), 200);
+    EXPECT_EQ(result_value.get_element(2), 300);
+}
+
+// Scenario: map value struct materialization follows DataTypeStruct field order even when
+// ColumnMapping children arrive in a different order from projected ColumnDefinition children.
+TEST(TableReaderTest, MaterializeMapValueStructUsesTableTypeOrder) {
+    const auto key_type = std::make_shared<DataTypeString>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto file_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, string_type}, Strings {"full_name", "gender"});
+    const auto table_value_type = std::make_shared<DataTypeStruct>(
+            DataTypes {string_type, string_type}, Strings {"full_name", "gender"});
+    const auto file_map_type = std::make_shared<DataTypeMap>(key_type, file_value_type);
+    const auto table_map_type = std::make_shared<DataTypeMap>(key_type, table_value_type);
+
+    ColumnMapping full_name_mapping;
+    full_name_mapping.table_column_name = "full_name";
+    full_name_mapping.file_column_name = "full_name";
+    full_name_mapping.file_local_id = 0;
+    full_name_mapping.table_type = string_type;
+    full_name_mapping.file_type = string_type;
+    full_name_mapping.is_trivial = true;
+
+    ColumnMapping gender_mapping;
+    gender_mapping.table_column_name = "gender";
+    gender_mapping.file_column_name = "gender";
+    gender_mapping.file_local_id = 1;
+    gender_mapping.table_type = string_type;
+    gender_mapping.file_type = string_type;
+    gender_mapping.is_trivial = true;
+
+    ColumnMapping value_mapping;
+    value_mapping.table_column_name = "value";
+    value_mapping.file_column_name = "value";
+    value_mapping.file_local_id = 1;
+    value_mapping.table_type = table_value_type;
+    value_mapping.file_type = file_value_type;
+    value_mapping.is_trivial = false;
+    value_mapping.child_mappings = {gender_mapping, full_name_mapping};
+
+    ColumnMapping key_mapping;
+    key_mapping.table_column_name = "key";
+    key_mapping.file_column_name = "key";
+    key_mapping.file_local_id = 0;
+    key_mapping.table_type = key_type;
+    key_mapping.file_type = key_type;
+    key_mapping.is_trivial = true;
+
+    ColumnMapping map_mapping;
+    map_mapping.table_column_name = "new_map_column";
+    map_mapping.file_column_name = "new_map_column";
+    map_mapping.table_type = table_map_type;
+    map_mapping.file_type = file_map_type;
+    map_mapping.is_trivial = false;
+    map_mapping.child_mappings = {key_mapping, value_mapping};
+
+    auto key_column = ColumnString::create();
+    key_column->insert_value("person10");
+    key_column->insert_value("person20");
+
+    auto full_name_column = ColumnString::create();
+    full_name_column->insert_value("Jack");
+    full_name_column->insert_value("James Lee");
+    auto gender_column = ColumnString::create();
+    gender_column->insert_value("Male");
+    gender_column->insert_value("Male");
+    MutableColumns value_children;
+    value_children.push_back(std::move(full_name_column));
+    value_children.push_back(std::move(gender_column));
+    auto value_column = ColumnStruct::create(std::move(value_children));
+
+    auto offsets_column = ColumnArray::ColumnOffsets::create();
+    offsets_column->insert_value(1);
+    offsets_column->insert_value(2);
+    ColumnPtr file_column = ColumnMap::create(std::move(key_column), std::move(value_column),
+                                              std::move(offsets_column));
+
+    TableReaderMaterializeTestHelper reader;
+    ColumnPtr result_column;
+    ASSERT_TRUE(reader._materialize_map_mapping_column(map_mapping, file_column, 2, &result_column)
+                        .ok());
+
+    const auto& result_map = assert_cast<const ColumnMap&>(*result_column);
+    const auto& result_value = assert_cast<const ColumnStruct&>(result_map.get_values());
+    ASSERT_EQ(result_value.get_columns().size(), 2);
+    const auto& full_name_result = assert_cast<const ColumnString&>(result_value.get_column(0));
+    const auto& gender_result = assert_cast<const ColumnString&>(result_value.get_column(1));
+    EXPECT_EQ(full_name_result.get_data_at(0).to_string(), "Jack");
+    EXPECT_EQ(full_name_result.get_data_at(1).to_string(), "James Lee");
+    EXPECT_EQ(gender_result.get_data_at(0).to_string(), "Male");
+    EXPECT_EQ(gender_result.get_data_at(1).to_string(), "Male");
+}
+
+TEST(TableReaderTest, PushDownMinMaxOnlyUsesSelectedRowGroupInFileRange) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_minmax_range_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {10, 1, 100}, {100, 10, 1000}, {"ten", "one", "hundred"},
+                                1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 1)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 1);
+    EXPECT_EQ(id_column.get_element(1), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownCountOnlyUsesSelectedRowGroupInFileRange) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_count_range_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 2)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownCountFallsBackWithTableConjunct) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_count_conjunct_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 2))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 1);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownCountFallsBackWithColumnPredicate) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_count_predicate_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30}, {"one", "two", "three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(0),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(2), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::COUNT,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 1);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    EXPECT_EQ(id_column.get_element(0), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, PushDownMinMaxFallsBackWithoutDirectFileMapping) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_minmax_missing_mapping_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(
+            make_table_column(99, "missing_id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                                    .push_down_agg_type = TPushAggOp::type::MINMAX,
+                            })
+                        .ok());
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 1);
+    expect_nullable_column_all_null(*block.get_by_position(0).column);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, OpenReaderBuildsTableFiltersFromConjuncts) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_conjunct_filter_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 3, "three");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(1, 1, 2))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // open_reader() should convert the table-level conjunct on projected column id 1 into
+    // _table_filters before ColumnMapper creates the FileScanRequest. ColumnMapper then rewrites
+    // the conjunct's slot ref from table column id 1 to the file-local block position used by
+    // ParquetReader. The projection order intentionally puts value before id, so the id filter
+    // column is not at position 0 in the file block.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(id_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 3);
+
+    ASSERT_TRUE(reader.close().ok());
+
+    TableReader filtered_reader;
+    ASSERT_TRUE(filtered_reader
+                        .init({
+                                .projected_columns = projected_columns,
+                                .column_predicates = {},
+                                .conjuncts = {prepared_conjunct(
+                                        &state, table_int32_greater_than_expr(1, 1, 4))},
+                                .format = FileFormat::PARQUET,
+                                .scan_params = nullptr,
+                                .io_ctx = nullptr,
+                                .runtime_state = &state,
+                                .scanner_profile = nullptr,
+                        })
+                        .ok());
+    ASSERT_TRUE(filtered_reader.prepare_split(build_split_options(file_path)).ok());
+
+    block = build_table_block(projected_columns);
+    eos = false;
+    ASSERT_TRUE(filtered_reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.get_by_position(1).column->size(), 0);
+
+    ASSERT_TRUE(filtered_reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, OpenReaderBuildsColumnPredicateFilters) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_column_predicate_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    // ColumnPredicate is only used for row-group/statistics pruning. Keep one row per row
+    // group so the predicate can prune the first two row groups and leave only id = 3.
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {1, 5, 8}, {"one", "two", "three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(2, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(1),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(2), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 0));
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(id_column.size(), 1);
+    ASSERT_EQ(value_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "three");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ColumnPredicateSurvivesReopenSplit) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_predicate_reopen_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const std::vector<std::string> file_paths = {
+            (test_dir / "split_1.parquet").string(),
+            (test_dir / "split_2.parquet").string(),
+    };
+    write_int_pair_parquet_file(file_paths[0], {1, 3}, {10, 30}, {"one", "three"}, 1);
+    write_int_pair_parquet_file(file_paths[1], {2, 4}, {20, 40}, {"two", "four"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(&column_predicates, GlobalIndex(0),
+                         create_comparison_predicate<PredicateType::GT>(
+                                 0, "id", make_nullable(std::make_shared<DataTypeInt32>()),
+                                 Field::create_field<TYPE_INT>(2), false));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = std::move(column_predicates),
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    std::vector<int32_t> ids;
+    for (const auto& file_path : file_paths) {
+        ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+        Block block = build_table_block(projected_columns);
+        bool eos = false;
+        ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+        ASSERT_FALSE(eos);
+        const auto& id_column =
+                assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+        ASSERT_EQ(id_column.size(), 1);
+        ids.push_back(id_column.get_element(0));
+
+        ASSERT_TRUE(reader.close().ok());
+    }
+
+    EXPECT_EQ(ids, std::vector<int32_t>({3, 4}));
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, CreateScanRequestDeduplicatesSharedPredicateColumns) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const std::vector<ColumnDefinition> projected_columns = {
+            make_table_column(0, "a", int_type),
+            make_table_column(1, "b", int_type),
+            make_table_column(2, "c", int_type),
+            make_table_column(3, "value", std::make_shared<DataTypeString>()),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            make_file_column(0, "a", int_type),
+            make_file_column(1, "b", int_type),
+            make_file_column(2, "c", int_type),
+            make_file_column(3, "value", std::make_shared<DataTypeString>()),
+    };
+
+    TableColumnMapper mapper;
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok());
+
+    std::vector<TableFilter> table_filters;
+    table_filters.push_back({
+            // This test only needs the referenced global indices to drive predicate-column
+            // placement. Keep the conjunct empty so the assertion focuses on scan-column
+            // de-duplication rather than expression rewrite/prepare behavior.
+            .conjunct = nullptr,
+            .global_indices = {GlobalIndex(0), GlobalIndex(1)},
+    });
+    table_filters.push_back({
+            .conjunct = nullptr,
+            .global_indices = {GlobalIndex(0), GlobalIndex(2)},
+    });
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request(table_filters, {}, projected_columns, &file_request).ok());
+
+    // Both filters reference column a. It must still be read once as a predicate column, and a
+    // predicate column must not be repeated as a non-predicate column.
+    EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0, 1, 2}));
+    EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector<int32_t>({3}));
+    ASSERT_EQ(file_request.local_positions.size(), 4);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(3)).value(), 0);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(0)).value(), 1);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(1)).value(), 2);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(2)).value(), 3);
+    const auto predicate_column_ids = projection_ids(file_request.predicate_columns);
+    const auto non_predicate_column_ids = projection_ids(file_request.non_predicate_columns);
+    for (const auto predicate_column_id : predicate_column_ids) {
+        EXPECT_TRUE(std::find(non_predicate_column_ids.begin(), non_predicate_column_ids.end(),
+                              predicate_column_id) == non_predicate_column_ids.end());
+    }
+}
+
+TEST(TableReaderTest, CreateScanRequestPromotesProjectedColumnToPredicateColumn) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const std::vector<ColumnDefinition> projected_columns = {
+            make_table_column(0, "id", int_type),
+            make_table_column(1, "score", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            make_file_column(0, "id", int_type),
+            make_file_column(1, "score", int_type),
+    };
+
+    TableColumnMapper mapper;
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok());
+
+    TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(table_int32_greater_than_expr(0, 0, 1)),
+            .global_indices = {GlobalIndex(0)},
+    };
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request).ok());
+
+    EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector<int32_t>({1}));
+    ASSERT_EQ(file_request.local_positions.size(), 2);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(0)).value(), 1);
+    EXPECT_EQ(file_request.local_positions.at(LocalColumnId(1)).value(), 0);
+}
+
+TEST(TableReaderTest, CreateScanRequestUsesColumnNameForByNamePredicateMapping) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    std::vector<ColumnDefinition> projected_columns = {
+            make_table_column(10, "id", int_type),
+            make_table_column(11, "score", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            make_file_column(0, "ID", int_type),
+            make_file_column(1, "score", int_type),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    set_name_identifiers(&projected_columns);
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok());
+
+    TableFilter table_filter {
+            .conjunct = VExprContext::create_shared(table_int32_greater_than_expr(0, 0, 1)),
+            .global_indices = {GlobalIndex(0)},
+    };
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(
+            mapper.create_scan_request({table_filter}, {}, projected_columns, &file_request).ok());
+
+    EXPECT_EQ(projection_ids(file_request.predicate_columns), std::vector<int32_t>({0}));
+    EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector<int32_t>({1}));
+    ASSERT_EQ(file_request.conjuncts.size(), 1);
+    const auto* localized_slot =
+            assert_cast<const VSlotRef*>(file_request.conjuncts[0]->root()->children()[0].get());
+    EXPECT_EQ(localized_slot->slot_id(), 0);
+    EXPECT_EQ(localized_slot->column_id(), 1);
+}
+
+TEST(TableReaderTest, ColumnPredicateFilterUsesColumnNameForByNameMapping) {
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    std::vector<ColumnDefinition> projected_columns = {
+            make_table_column(10, "id", int_type),
+            make_table_column(11, "score", int_type),
+    };
+    const std::vector<ColumnDefinition> file_schema = {
+            make_file_column(0, "ID", int_type),
+            make_file_column(1, "score", int_type),
+    };
+
+    TableColumnMapper mapper({.mode = TableColumnMappingMode::BY_NAME});
+    set_name_identifiers(&projected_columns);
+    ASSERT_TRUE(mapper.create_mapping(projected_columns, {}, file_schema).ok());
+
+    TableColumnPredicates column_predicates;
+    add_column_predicate(
+            &column_predicates, GlobalIndex(0),
+            create_comparison_predicate<PredicateType::GT>(
+                    10, "id", make_nullable(int_type), Field::create_field<TYPE_INT>(2), false));
+
+    FileScanRequest file_request;
+    ASSERT_TRUE(mapper.create_scan_request({}, column_predicates, projected_columns, &file_request)
+                        .ok());
+
+    ASSERT_EQ(file_request.column_predicate_filters.size(), 1);
+    EXPECT_EQ(file_request.column_predicate_filters[0].file_column_id.value(), 0);
+    EXPECT_EQ(projection_ids(file_request.non_predicate_columns), std::vector<int32_t>({0, 1}));
+    EXPECT_TRUE(file_request.predicate_columns.empty());
+}
+
+TEST(TableReaderTest, OpenReaderPushesMultiColumnConjunctToParquetReader) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_multi_conjunct_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {1, 5, 8}, {"one", "two", "three"});
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(2, "value", std::make_shared<DataTypeString>()));
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    projected_columns.push_back(make_table_column(1, "score", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(
+            reader.init({
+                                .projected_columns = projected_columns,
+                                .column_predicates = {},
+                                .conjuncts = {prepared_conjunct(
+                                        &state, table_int32_sum_greater_than_expr(1, 1, 2, 2, 8))},
+                                .format = FileFormat::PARQUET,
+                                .scan_params = nullptr,
+                                .io_ctx = nullptr,
+                                .runtime_state = &state,
+                                .scanner_profile = nullptr,
+                        })
+                    .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // The conjunct references both id and score, so ColumnMapper must put both file columns into
+    // predicate_columns and rewrite both slot refs to ParquetReader's file-local block positions.
+    // ParquetReader then evaluates the expression after all predicate columns have been read.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 0));
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 1));
+    const auto& score_column =
+            assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 2));
+    ASSERT_EQ(id_column.size(), 1);
+    ASSERT_EQ(score_column.size(), 1);
+    ASSERT_EQ(value_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 3);
+    EXPECT_EQ(score_column.get_element(0), 8);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "three");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedColumnsFillDefaultForParquetSchemaMismatch) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_schema_mismatch_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(
+            make_table_column(99, "missing_value", std::make_shared<DataTypeString>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // The table projection asks for field id 99, but the ParquetReader exposes only file-local
+    // fields 0 and 1. Missing columns are allowed by the current mapper options, so TableReader
+    // should still use the Parquet row count and fill a default column in table schema.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    EXPECT_EQ(block.get_by_position(0).column->size(), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, DefaultExprResultMatchesNullableTableType) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_nullable_default_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    auto missing_column = make_table_column(99, "c_new", make_nullable(int_type));
+    missing_column.default_expr = VExprContext::create_shared(
+            VLiteral::create_shared(int_type, Field::create_field<TYPE_INT>(42)));
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(std::move(missing_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+
+    const auto& result = block.get_by_position(0);
+    ASSERT_TRUE(result.check_type_and_column_match().ok());
+    EXPECT_TRUE(result.type->is_nullable());
+    ASSERT_TRUE(result.column->is_nullable());
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(nullable_column.size(), 1);
+    EXPECT_EQ(nullable_column.get_null_map_data()[0], 0);
+    const auto& values = assert_cast<const ColumnInt32&>(nullable_column.get_nested_column());
+    EXPECT_EQ(values.get_element(0), 42);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, DefaultExprAlignsNestedNullableArrayTableType) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_nested_nullable_array_default_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    const auto bigint_type = std::make_shared<DataTypeInt64>();
+    const auto array_type = std::make_shared<DataTypeArray>(make_nullable(bigint_type));
+    const auto table_type = make_nullable(array_type);
+    auto missing_column = make_table_column(99, "single_element_groups", table_type);
+    missing_column.default_expr = VExprContext::create_shared(
+            std::make_shared<NullableArrayBigintDefaultExpr>(table_type));
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(std::move(missing_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+
+    const auto& result = block.get_by_position(0);
+    ASSERT_TRUE(result.check_type_and_column_match().ok());
+    ASSERT_TRUE(result.column->is_nullable());
+    const auto& nullable_column = assert_cast<const ColumnNullable&>(*result.column);
+    ASSERT_EQ(nullable_column.size(), 1);
+    EXPECT_EQ(nullable_column.get_null_map_data()[0], 0);
+
+    const auto& array_column = assert_cast<const ColumnArray&>(nullable_column.get_nested_column());
+    ASSERT_EQ(array_column.size(), 1);
+    EXPECT_EQ(array_column.get_offsets()[0], 1);
+    ASSERT_TRUE(array_column.get_data().is_nullable());
+    const auto& nested_nullable = assert_cast<const ColumnNullable&>(array_column.get_data());
+    ASSERT_EQ(nested_nullable.size(), 1);
+    EXPECT_EQ(nested_nullable.get_null_map_data()[0], 0);
+    const auto& values = assert_cast<const ColumnInt64&>(nested_nullable.get_nested_column());
+    EXPECT_EQ(values.get_element(0), 7);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedColumnsFillMissingParquetColumnWithDefault) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_schema_mismatch_reject_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(
+            make_table_column(99, "missing_value", std::make_shared<DataTypeString>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    const auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+
+    const auto& result = block.get_by_position(0);
+    ASSERT_TRUE(result.check_type_and_column_match().ok());
+    // A missing scalar column without an explicit default is materialized as a default-value
+    // column. It may stay constant, so verify through the IColumn interface instead of assuming a
+    // concrete ColumnString instance.
+    ASSERT_EQ(result.column->size(), 1);
+    EXPECT_EQ(result.column->get_data_at(0).to_string(), "");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedStructFillsMissingChildWithDefault) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_struct_missing_child_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_struct_parquet_file(file_path, 7);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    auto id_child = make_table_column(0, "id", int_type);
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto struct_type = std::make_shared<DataTypeStruct>(DataTypes {int_type, string_type},
+                                                        Strings {"id", "missing_child"});
+    auto struct_column = make_table_column(100, "s", struct_type);
+    struct_column.children = {id_child, missing_child};
+    std::vector<ColumnDefinition> projected_columns = {struct_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& struct_result =
+            assert_cast<const ColumnStruct&>(expect_not_null_table_column(block, 0));
+    ASSERT_EQ(struct_result.get_columns().size(), 2);
+    const auto& ids = assert_cast<const ColumnInt32&>(
+            expect_not_null_nullable_nested_column(struct_result.get_column(0)));
+    ASSERT_EQ(struct_result.size(), 1);
+    EXPECT_EQ(ids.get_element(0), 7);
+    expect_nullable_column_all_null(struct_result.get_column(1));
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ReusedBlockClearsProjectedStructWithNullableChild) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_struct_nullable_child_reuse_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_struct_with_nullable_child_parquet_file(file_path);
+
+    const auto int_type = std::make_shared<DataTypeInt32>();
+    const auto string_type = std::make_shared<DataTypeString>();
+    const auto nullable_string_type = make_nullable(string_type);
+    auto id_child = make_table_column(0, "id", int_type);
+    auto note_child = make_table_column(1, "note", nullable_string_type);
+    auto missing_child = make_table_column(99, "missing_child", string_type);
+    auto struct_type = std::make_shared<DataTypeStruct>(
+            DataTypes {int_type, nullable_string_type, string_type},
+            Strings {"id", "note", "missing_child"});
+    auto struct_column = make_table_column(100, "s", struct_type);
+    struct_column.children = {id_child, note_child, missing_child};
+    std::vector<ColumnDefinition> projected_columns = {struct_column};
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+    ASSERT_EQ(block.rows(), 2);
+    const auto& struct_result =
+            assert_cast<const ColumnStruct&>(expect_not_null_table_column(block, 0));
+    const auto& notes = assert_cast<const ColumnNullable&>(struct_result.get_column(1));
+    EXPECT_FALSE(notes.is_null_at(0));
+    EXPECT_TRUE(notes.is_null_at(1));
+
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.rows(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedPartitionColumnUsesSplitPartitionValue) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_partition_value_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    auto partition_column = make_table_column(1, "value", std::make_shared<DataTypeString>());
+    partition_column.is_partition_key = true;
+    projected_columns.push_back(std::move(partition_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.partition_values.emplace("value", Field::create_field<TYPE_STRING>("p1"));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    // The file has a physical column with the same id/name. The split partition value should still
+    // take precedence and be materialized by TableReader.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto partition_value = block.get_by_position(0).column->convert_to_full_column_if_const();
+    const auto& partition_value_data = assert_cast<const ColumnString&>(
+            expect_not_null_nullable_nested_column(*partition_value));
+    ASSERT_EQ(partition_value_data.size(), 1);
+    EXPECT_EQ(partition_value_data.get_data_at(0).to_string(), "p1");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ConstantPartitionFilterSkipsSplitWhenFalse) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_constant_partition_filter_skip_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    auto partition_column = make_table_column(0, "part", std::make_shared<DataTypeInt32>());
+    partition_column.is_partition_key = true;
+    projected_columns.push_back(std::move(partition_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 10))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.partition_values.emplace("part", Field::create_field<TYPE_INT>(7));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.get_by_position(0).column->size(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ConstantPartitionFilterKeepsSplitWhenTrue) {
+    const auto test_dir = std::filesystem::temp_directory_path() /
+                          "doris_table_reader_constant_partition_filter_keep_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    auto partition_column = make_table_column(0, "part", std::make_shared<DataTypeInt32>());
+    partition_column.is_partition_key = true;
+    projected_columns.push_back(std::move(partition_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {prepared_conjunct(
+                                            &state, table_int32_greater_than_expr(0, 0, 1))},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.partition_values.emplace("part", Field::create_field<TYPE_INT>(7));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    expect_int32_column_values(*block.get_by_position(0).column, {7});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, RuntimeFilterOnConstantPartitionIsNotPreExecuted) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_constant_runtime_filter";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    auto partition_column = make_table_column(0, "part", std::make_shared<DataTypeInt32>());
+    partition_column.is_partition_key = true;
+    projected_columns.push_back(std::move(partition_column));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(
+            reader.init({
+                                .projected_columns = projected_columns,
+                                .column_predicates = {},
+                                .conjuncts = {prepared_conjunct(
+                                        &state, runtime_filter_wrapper_expr(
+                                                        table_int32_greater_than_expr(0, 0, 1)))},
+                                .format = FileFormat::PARQUET,
+                                .scan_params = nullptr,
+                                .io_ctx = nullptr,
+                                .runtime_state = &state,
+                                .scanner_profile = nullptr,
+                        })
+                    .ok());
+
+    auto split_options = build_split_options(file_path);
+    split_options.partition_values.emplace("part", Field::create_field<TYPE_INT>(7));
+    ASSERT_TRUE(reader.prepare_split(split_options).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    const auto status = reader.get_block(&block, &eos);
+    ASSERT_TRUE(status.ok()) << status.to_string();
+    ASSERT_FALSE(eos);
+    expect_int32_column_values(*block.get_by_position(0).column, {7});
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ParquetReaderReadsOnlyRowGroupsInFileRange) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_file_range_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_int_pair_parquet_file(file_path, {1, 2, 3}, {10, 20, 30},
+                                {"range_group_one", "range_group_two", "range_group_three"}, 1);
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt32>()));
+    projected_columns.push_back(make_table_column(2, "value", std::make_shared<DataTypeString>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options_for_row_group_mid(file_path, 1)).ok());
+
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(block.rows(), 1);
+    EXPECT_EQ(id_column.get_element(0), 2);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "range_group_two");
+
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    EXPECT_TRUE(eos);
+    EXPECT_EQ(block.rows(), 0);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedColumnsUseMapperExpressionForSameNameDifferentIdParquetSchema) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_same_name_diff_id_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 1, "one");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(99, "id", std::make_shared<DataTypeInt32>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // The table column has the same name as the Parquet field, but a different field id.
+    // ColumnMapper should still resolve it by name and build a SlotRef projection from the file
+    // column into the requested table column.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    const auto& id_column = assert_cast<const ColumnInt32&>(expect_not_null_table_column(block, 0));
+    ASSERT_EQ(id_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 1);
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+TEST(TableReaderTest, ProjectedColumnsUseMapperExpressionsForParquetSchemaMismatch) {
+    const auto test_dir =
+            std::filesystem::temp_directory_path() / "doris_table_reader_mapper_expr_test";
+    std::filesystem::remove_all(test_dir);
+    std::filesystem::create_directories(test_dir);
+
+    const auto file_path = (test_dir / "split.parquet").string();
+    write_parquet_file(file_path, 7, "seven");
+
+    std::vector<ColumnDefinition> projected_columns;
+    projected_columns.push_back(make_table_column(0, "id", std::make_shared<DataTypeInt64>()));
+    projected_columns.push_back(make_table_column(1, "value", std::make_shared<DataTypeString>()));
+
+    RuntimeState state {TQueryOptions(), TQueryGlobals()};
+    set_name_identifiers(&projected_columns);
+    TableReader reader;
+    ASSERT_TRUE(reader.init({
+                                    .projected_columns = projected_columns,
+                                    .column_predicates = {},
+                                    .conjuncts = {},
+                                    .format = FileFormat::PARQUET,
+                                    .scan_params = nullptr,
+                                    .io_ctx = nullptr,
+                                    .runtime_state = &state,
+                                    .scanner_profile = nullptr,
+                            })
+                        .ok());
+
+    ASSERT_TRUE(reader.prepare_split(build_split_options(file_path)).ok());
+
+    // The table projection requests id as BIGINT instead of the file INT, so ColumnMapper should
+    // build a Cast expression. The second field has the same type and should build a SlotRef
+    // projection. Both columns should still materialize in table schema order.
+    Block block = build_table_block(projected_columns);
+    bool eos = false;
+    ASSERT_TRUE(reader.get_block(&block, &eos).ok());
+    ASSERT_FALSE(eos);
+
+    ASSERT_EQ(block.get_by_position(0).name, "id");
+    ASSERT_EQ(block.get_by_position(1).name, "value");
+    const auto& id_column = assert_cast<const ColumnInt64&>(expect_not_null_table_column(block, 0));
+    const auto& value_column =
+            assert_cast<const ColumnString&>(expect_not_null_table_column(block, 1));
+    ASSERT_EQ(id_column.size(), 1);
+    ASSERT_EQ(value_column.size(), 1);
+    EXPECT_EQ(id_column.get_element(0), 7);
+    EXPECT_EQ(value_column.get_data_at(0).to_string(), "seven");
+
+    ASSERT_TRUE(reader.close().ok());
+    std::filesystem::remove_all(test_dir);
+}
+
+} // namespace
+} // namespace doris::format
diff --git a/docs/doris-iceberg-parquet-api-design.md b/docs/doris-iceberg-parquet-api-design.md
new file mode 100644
index 00000000000000..457550a932da67
--- /dev/null
+++ b/docs/doris-iceberg-parquet-api-design.md
@@ -0,0 +1,511 @@
+# Doris Iceberg + Parquet 新架构 API 设计
+
+本文档用于描述 Doris 中 Iceberg + Parquet 新架构的 API 设计。本文档作为后续从
+`master` 新开重构分支时的起点，只定义 API 形状、职责边界、依赖方向和兼容原则，
+不定义函数实现细节，不提供伪代码，不包含迁移 patch。
+
+## 架构总览
+
+目标架构包含 table 调度层、表格式语义层、schema 映射层、文件通用层和文件格式实现层：
+
+```text
+FileScanner / split producer
+    ->
+TableReader
+    ->
+IcebergTableReader
+    ->
+TableColumnMapper + FileReader
+    ->
+ParquetReader
+```
+
+核心职责如下：
+
+- `TableReader`
+  负责多文件、多 split 的上层调度，统一 scan 生命周期，对外输出 table block，
+  并承接动态分区裁剪等 table-level 通用逻辑。
+- `IcebergTableReader`
+  负责 Iceberg 表语义，包括 schema 绑定、scan task、delete file、虚拟列和 table
+  block finalize。
+- `TableColumnMapper`
+  负责 table schema 到 file schema 的映射，负责 filter localization 和 schema
+  change 映射。
+- `FileReader`
+  负责文件层通用读取接口，只理解 file-local schema 和 file-local scan request。
+- `ParquetReader`
+  作为 `FileReader` 的 Parquet 实现，负责 Parquet 文件物理读取。
+
+依赖方向必须保持单向：
+
+```text
+TableReader
+  -> IcebergTableReader
+    -> TableColumnMapper
+    -> FileReader
+      -> ParquetReader
+```
+
+低层不反向理解高层语义，尤其 `ParquetReader` 不得反向理解 Iceberg/global schema。
+
+## 核心 API 设计
+
+### TableReader
+
+`TableReader` 是最上层读取接口，作为 `IcebergTableReader` 的基类，负责多 split /
+多 file 调度，并承接 table-level 的通用裁剪逻辑，不下沉文件格式语义。
+
+实际 API 文件：
+
+```text
+be/src/format_v2/table_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::format
+```
+
+建议职责：
+
+- 接收 split 列表或 scan task 列表；
+- 控制当前 reader 的创建、切换和关闭；
+- 管理 scan 生命周期；
+- 承接动态分区裁剪等 table-level 通用过滤逻辑；
+- 对外统一输出 table block。
+- `next` 是基类统一入口，内部负责 EOF 后切换 reader；具体表格式只提供打开和读取
+  当前 reader 的 hook。
+
+建议接口形状：
+
+```cpp
+namespace doris::format {
+
+class TableReader {
+public:
+    virtual ~TableReader() = default;
+
+    virtual Status init(const TableReadOptions& options);
+    virtual Status filter(const VExprContextSPtr& expr, bool* can_filter_all);
+    Status next(Block* table_block, size_t* rows, bool* eof);
+    virtual Status close();
+
+protected:
+    Status next_reader();
+    virtual Status open_next_reader(bool* has_reader);
+    virtual Status read_current(Block* table_block, size_t* rows, bool* eof);
+    virtual Status close_current_reader();
+};
+
+} // namespace doris::format
+```
+
+接口约束：
+
+- `TableReader` 输出的是 table block，不输出 file-local block。
+- `TableReader` 负责多文件编排和 table-level 通用裁剪，不负责 schema mapping，不负责
+  Parquet 物理解码。
+- `next_reader` 是 `TableReader` 自己的通用切换逻辑，不作为子类公开 override 接口。
+- 动态分区裁剪这类逻辑应下放到 `TableReader`，而不是散落在具体表格式 reader 中。
+- `TableReader` 不直接依赖旧 `vparquet` 表层语义。
+
+### IcebergTableReader
+
+`IcebergTableReader` 是 Iceberg 表语义层，负责把单个 Iceberg data file 的读取组织成
+table 语义输出。
+
+实际 API 文件：
+
+```text
+be/src/format_v2/table/iceberg_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::iceberg
+```
+
+建议职责：
+
+- 绑定 Iceberg 当前 table schema；
+- 接收 `IcebergScanTask` 列表，并按 `TableReader` 的统一调度打开当前 task；
+- 处理 position delete、equality delete、deletion vector；
+- 物化 `_row_id`、`_last_updated_sequence_number` 等虚拟列；
+- 将 `ParquetReader` 返回的 file-local block finalize 成 table block。
+
+建议接口形状：
+
+```cpp
+namespace doris::iceberg {
+
+class IcebergTableReader : public format::TableReader {
+public:
+    virtual ~IcebergTableReader() = default;
+
+    Status init(IcebergTableReadParams params);
+    Status close() override;
+
+protected:
+    Status open_next_reader(bool* has_reader) override;
+    Status read_current(Block* table_block, size_t* rows, bool* eof) override;
+    Status close_current_reader() override;
+};
+
+} // namespace doris::iceberg
+```
+
+接口约束：
+
+- `IcebergTableReader` 继承 `TableReader`，并通过组合使用 `FileReader`。
+- `IcebergTableReader` 不做 Parquet page/column 解码。
+- `IcebergTableReader` 负责 table-level finalize，不负责 file-local pruning 实现。
+- `IcebergTableReader` 的 schema、scan request、scan tasks 和底层 `FileReader` 应通过
+  一个初始化参数对象一次性传入；除非存在明确生命周期差异，不拆成 `bind` /
+  `init(TableScanRequest)` / `set_scan_tasks` 多阶段接口。
+- `IcebergTableReader` 不重新实现 reader 切换循环，只实现打开 Iceberg task、读取当前
+  task 和关闭当前 reader 的 hook。
+
+### TableColumnMapper
+
+`TableColumnMapper` 是 table schema 到 file schema 的通用映射层，不是
+Iceberg-only 组件。
+
+实际 API 文件：
+
+```text
+be/src/format_v2/table_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::format
+```
+
+建议职责：
+
+- 输入 table schema、file schema、table scan request；
+- 输出 `ColumnMapping` 和通用 `FileScanRequest`；
+- 负责 filter localization；
+- 负责 schema change 映射；
+- 负责复杂列 child mapping；
+- 负责缺失列、default、partition、generated 列的 finalize 语义描述。
+
+建议接口形状：
+
+```cpp
+namespace doris::format {
+
+class TableColumnMapper {
+public:
+    explicit TableColumnMapper(TableColumnMapperOptions options = {});
+
+    virtual Status create_mapping(const std::vector<TableColumnDefinition>& table_schema,
+                                  const std::vector<SchemaField>& file_schema,
+                                  std::vector<ColumnMapping>* mappings);
+
+    virtual Status create_scan_request(const TableScanRequest& table_request,
+                                       const std::vector<ColumnMapping>& mappings,
+                                       FileScanRequest* file_request);
+};
+
+} // namespace doris::format
+```
+
+接口约束：
+
+- `TableColumnMapper` 的输入是 table schema + file schema + table scan request。
+- `TableColumnMapper` 的输出是 `ColumnMapping` + `FileScanRequest`。
+- `TableColumnMapper` 必须是通用层，不做 Iceberg-only 命名。
+- Iceberg 场景默认按 field id 映射；按 name 映射不是本轮默认路径。
+
+### FileReader
+
+`FileReader` 是文件物理读取层的通用接口，为后续 Parquet 之外的文件格式适配预留。
+
+实际 API 文件：
+
+```text
+be/src/format_v2/file_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::format
+```
+
+建议职责：
+
+- 打开物理文件；
+- 暴露 file-local schema；
+- 接收 `FileScanRequest`；
+- 输出 file-local block；
+- 不理解 table/global schema。
+
+建议接口形状：
+
+```cpp
+namespace doris::format {
+
+class FileReader {
+public:
+    virtual ~FileReader() = default;
+
+    virtual Status open(io::FileReaderSPtr file, io::IOContext* io_ctx = nullptr);
+    virtual Status get_schema(std::vector<SchemaField>* file_schema) const;
+    virtual Status init(const FileScanRequest& request);
+    virtual Status next(Block* file_block, size_t* rows, bool* eof);
+    virtual Status close();
+};
+
+} // namespace doris::format
+```
+
+接口约束：
+
+- `FileReader` 输出的是 file-local block，不输出 table/global schema block。
+- `FileReader` 不处理 Iceberg schema evolution、default/generated/partition 列。
+- `IcebergTableReader` 组合 `FileReader`，不直接绑定具体文件格式 reader。
+
+### ParquetReader
+
+`ParquetReader` 是 `FileReader` 的 Parquet 实现，只负责 Parquet file-local schema
+和 Parquet file-local scan request。
+
+实际 API 文件：
+
+```text
+be/src/format/parquet/parquet_reader.h
+```
+
+实际命名空间：
+
+```cpp
+namespace doris::parquet
+```
+
+建议职责：
+
+- 打开 Parquet 文件；
+- 解析 footer 和 file schema；
+- 接收 `ParquetScanRequest` 或通用 `FileScanRequest`；
+- 执行 file-local projection 和 file-local filter；
+- 输出 file-local block。
+
+建议接口形状：
+
+```cpp
+namespace doris::parquet {
+
+class ParquetReader : public format::FileReader {
+public:
+    virtual ~ParquetReader() = default;
+
+    virtual Status open(io::FileReaderSPtr file, io::IOContext* io_ctx = nullptr);
+    virtual Status get_schema(std::vector<format::SchemaField>* file_schema) const;
+    virtual Status init(const ParquetScanRequest& request);
+    virtual Status next(Block* file_block, size_t* rows, bool* eof);
+    virtual Status close();
+};
+
+} // namespace doris::parquet
+```
+
+接口约束：
+
+- `ParquetReader` 输出的是 file-local block，不输出 table/global schema block。
+- `ParquetReader` 不理解 Iceberg schema evolution。
+- `ParquetReader` 不负责 default/generated/partition 列。
+- 任何 table-level cast/default/generated/partition 语义都不能重新塞回
+  `ParquetReader`。
+
+## 关键类型
+
+### SchemaField
+
+`SchemaField` 表示文件层 schema 中的列定义。
+
+建议包含的信息：
+
+- file-local column id；
+- 列名；
+- 类型；
+- child fields。
+
+它服务于 `TableColumnMapper` 做 schema matching，不携带 table-level 语义。
+
+### TableColumnDefinition
+
+`TableColumnDefinition` 表示 table/global schema 中的列定义。
+
+建议包含的信息：
+
+- table column id；
+- 列名；
+- 类型；
+- child columns。
+
+Iceberg 场景下，column id 默认对应 field id。
+
+### TableFilter
+
+`TableFilter` 表示 table 层过滤条件。
+
+建议包含的信息：
+
+- `table_column_id`
+- `conjunct`
+- `predicates`
+
+职责约束：
+
+- `conjunct` 偏表达式过滤，适合表达 cast、复杂表达式、复杂列提取等语义；
+- `predicates` 偏结构化单列下推，适合驱动 row group stats、page index、dictionary、
+  bloom filter 等文件层优化。
+
+### FileLocalFilter
+
+`FileLocalFilter` 表示已经 localize 到 file-local schema 的过滤条件。
+
+建议包含的信息：
+
+- `file_column_id`
+- `conjunct`
+- `predicates`
+
+职责约束：
+
+- `conjunct` 用于 file-local 表达式过滤；
+- `predicates` 用于 file-local 结构化下推；
+- 其输入必须来自 `TableColumnMapper`，不能由具体文件 reader 自己推导 table 语义。
+
+### ColumnMapping
+
+`ColumnMapping` 是 table schema 与 file schema 之间的核心边界对象。
+
+建议包含的信息：
+
+- `table_column_id`
+- `file_column_id`
+- `file_type`
+- `table_type`
+- `finalize_expr`
+- `reader_filter_expr`
+- `child_mappings`
+
+职责约束：
+
+- `finalize_expr` 服务最终输出，把 file-local value 转成 table/global value；
+- `reader_filter_expr` 服务读时 filter fallback；
+- 二者语义不同，不能混用；
+- `child_mappings` 用于复杂列 remap、复杂列裁剪和复杂列 schema change。
+
+### TableScanRequest
+
+`TableScanRequest` 描述 table 层 scan 请求。
+
+建议包含的信息：
+
+- projected table columns；
+- table filters。
+
+它由 `IcebergTableReader` 接收，再交给 `TableColumnMapper` 生成 file-local request。
+
+### ParquetScanRequest
+
+`ParquetScanRequest` 继承 `FileScanRequest`，描述 Parquet file-local scan 请求。
+
+### FileScanRequest
+
+`FileScanRequest` 描述通用 file-local scan 请求。
+
+建议包含的信息：
+
+- projected file columns；
+- local filters；
+- reader expression map。
+
+它是 `FileReader` 的唯一 scan 输入，不包含 table/global schema 语义。
+
+### IcebergScanTask
+
+`IcebergScanTask` 表示一次 Iceberg data file 读取任务。
+
+建议包含的信息：
+
+- data file 信息；
+- position delete 文件；
+- equality delete 文件；
+- deletion vector 信息。
+
+它是 `IcebergTableReader` 的输入，不应直接传给 `ParquetReader`。
+
+### IcebergTableReadParams
+
+`IcebergTableReadParams` 表示一次 Iceberg table scan 的完整初始化输入。
+
+建议包含的信息：
+
+- Iceberg read options；
+- Iceberg table schema；
+- table scan request；
+- Iceberg scan task 列表；
+- 底层 `FileReader`。
+
+它用于避免 `IcebergTableReader` 暴露多个半初始化阶段。调用方应一次性构造完整
+参数并调用 `init`。
+
+## 设计原则
+
+### 边界原则
+
+- `FileReader` 不理解 global schema，不直接处理 Iceberg schema evolution。
+- `ParquetReader` 是 `FileReader` 的 Parquet 实现。
+- `TableColumnMapper` 是 schema mapping 和 filter localization 的唯一入口。
+- `IcebergTableReader` 不做 Parquet 解码，只负责 table-level finalize、delete、
+  virtual columns。
+- `TableReader` 只负责多文件编排和 table-level 通用裁剪，不下沉文件格式语义。
+- 任何 table-level cast/default/generated/partition 语义都不能重新塞回
+  `ParquetReader`。
+
+### 依赖原则
+
+- 低层不能反向依赖高层语义。
+- `FileReader` 只依赖 file-local request。
+- `IcebergTableReader` 继承 `TableReader`，复用其多文件编排和通用裁剪能力。
+- `IcebergTableReader` 通过组合使用 `FileReader`。
+- `TableColumnMapper` 可以被 Iceberg 之外的其他表格式复用。
+
+### 命名原则
+
+- 表层抽象使用 `TableReader`、`IcebergTableReader`、`TableColumnMapper`、
+  `FileReader`、`ParquetReader` 命名。
+- `TableColumnMapper` 不使用 Iceberg-only 命名。
+- file schema 类型使用 `SchemaField`，table schema 类型使用 `TableColumnDefinition`。
+
+## 兼容原则
+
+新架构重构期间，新旧代码允许并存，但必须遵守以下约束：
+
+- 旧 `vparquet` / Hive / Hudi / Paimon 路径在新架构稳定前允许保留。
+- 新架构实现不得继续向旧 `vparquet` 表层语义回灌依赖。
+- 先搭新框架 API，再逐步迁移调用点。
+- 不允许边改 API 边混入临时裸逻辑、实验性草稿或未收敛命名。
+- 兼容层可能需要存在，但本文档不定义兼容层的具体实现方案。
+
+## 验收标准
+
+该文档应满足以下目标：
+
+- 不引用错误实验代码作为既成事实；
+- 不出现实现性草稿、裸伪代码、未收敛命名混用；
+- 让另一个工程师从 `master` 新开分支时，可以直接按本文档搭 API 骨架；
+- 读完文档后，不需要再讨论以下问题：
+  - 新架构分几层；
+  - 每层负责什么；
+  - 哪层理解 global schema；
+  - 哪层做 schema change / filter localization / finalize；
+  - 哪层允许依赖旧实现，哪层不允许。
diff --git a/docs/new-parquet-reader-column-index-refactor.md b/docs/new-parquet-reader-column-index-refactor.md
new file mode 100644
index 00000000000000..56f8c7ca4a37d5
--- /dev/null
+++ b/docs/new-parquet-reader-column-index-refactor.md
@@ -0,0 +1,404 @@
+# New Reader 列标识实现说明
+
+本文说明 Doris new table/file reader 栈中各种列标识的当前含义，以及它们在
+`FileScannerV2`、`TableReader`、`TableColumnMapper` 和 new Parquet reader 中的流转逻辑。
+
+核心原则是把 **schema identity** 和 **执行期位置** 分开：
+
+- schema identity 用来判断 table column 和 file column 是否是同一列。
+- index/position 用来表示 block、projection tree、scan request 或 constant map 中的位置。
+- FE column unique id 只在 scanner 边界用于定位 slot，进入 table/file reader 后不再出现。
+
+共享定义集中在 `be/src/format_v2/column_data.h`。file reader 通用请求定义在
+`be/src/format_v2/file_reader.h`。new Parquet reader 自己的 Parquet 内部 schema tree 定义在
+`be/src/format_v2/parquet/parquet_column_schema.h`。
+
+## 层级边界
+
+当前 reader 栈可以按语义分成三层。
+
+### FileScannerV2：FE 标识到 reader 标识的边界
+
+`FileScannerV2` 仍能看到 FE 下发的 `slot_id`、`col_unique_id`、`TFileScanSlotInfo` 和
+`TColumnAccessPath`。这些 FE 侧标识只在这里使用。
+
+`FileScannerV2::_build_projected_columns()` 会把 `_params->required_slots` 转成
+`std::vector<format::ColumnDefinition>`：
+
+- vector 下标就是 `GlobalIndex`。
+- `_slot_id_to_global_index` 把 FE `slot_id` 转成 `GlobalIndex`，用于 row-level conjunct。
+- `_column_unique_id_to_global_index` 把 FE `col_unique_id` 转成 `GlobalIndex`，用于 column predicate。
+- `ColumnDefinition::identifier` 表示 table-side schema identity，默认是列名；如果外部 schema
+  提供 field id，则改用 field id。
+- partition/default/generated 信息被挂到 `ColumnDefinition` 上，由 table reader 层处理。
+
+从这一层往下，table/file reader 不再使用 FE column unique id。
+
+### TableReader / TableColumnMapper：table schema 到 file schema
+
+`TableReader::open_reader()` 对每个 split 打开一个具体 `FileReader`，先通过
+`FileReader::get_schema()` 获取当前文件的 file-local schema，再用 `TableColumnMapper` 建立映射。
+
+`TableColumnMapper` 的输入是：
+
+- table/global schema：`FileScannerV2` 构造的 `projected_columns`。
+- file-local schema：具体 file reader 返回的 `std::vector<ColumnDefinition>`。
+- per-split partition values。
+- table-level row filters 和 column predicates。
+
+`TableColumnMapper` 的输出是：
+
+- `ColumnMapping`：构造阶段使用的 table column 到 file/constant/virtual source 的映射。
+- `FileScanRequest`：只含 file-local projection、file-local block layout 和 file-local filters。
+- `ColumnMapResult` / `ResultColumnMapping`：给 table reader finalize 阶段消费的最终映射。
+- `FilterEntry`：给 filter localization 使用的 `GlobalIndex -> LOCAL/CONSTANT/UNSET` target。
+- `ConstantMap`：partition/default/generated 常量列。
+
+### FileReader / ParquetReader：只理解 file-local 请求
+
+`FileReader` 只暴露两类 schema/request：
+
+- `get_schema(std::vector<ColumnDefinition>*)`：返回文件自身 schema。
+- `open(std::unique_ptr<FileScanRequest>&)`：接收已经 localize 后的 file-local scan request。
+
+具体 file reader 不理解 table/global schema、Iceberg default、partition column、FE slot id 或
+FE column unique id。
+
+new Parquet reader 使用 `FileScanRequest` 中的 `LocalColumnIndex` 创建 column reader，并使用
+`local_positions` 决定 file-local block layout。
+
+## ColumnDefinition
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`ColumnDefinition` 是 table/global schema 和 file-local schema 共用的列定义。它表示列名、类型、
+nested children、默认表达式、partition 属性和 file-local column kind。
+
+关键字段：
+
+- `identifier`：schema identity。用于 table column 和 file column 匹配。
+- `local_id`：file reader 返回的 schema node 在当前 parent 下的 reader-local id。
+- `name`：逻辑列名。BY_NAME 且没有显式 string identifier 时会回退到它。
+- `type`：当前 schema node 的 Doris 类型。
+- `children`：nested children。table/global schema 中是 table children；file schema 中是
+  file-local children。
+- `default_expr`：missing/default/generated column 的物化表达式。
+- `is_partition_key`：partition column 标记。
+- `column_type`：file-local column kind，例如普通数据列或 row number virtual column。
+
+`ColumnDefinition` 不保存 FE column unique id。它也不保存“应该按什么方式匹配”。匹配方式由
+`TableColumnMapperOptions::mode` 统一决定。
+
+### identifier
+
+`identifier` 是一个 `Field`，语义接近 DuckDB `MultiFileColumnDefinition::identifier`：
+
+- `TYPE_NULL`：没有显式 identifier。BY_NAME 时使用 `name`。
+- `TYPE_INT`：在 BY_FIELD_ID 中表示 field id；在 BY_INDEX 中表示 file schema position。
+- `TYPE_STRING`：显式 name identifier。
+
+访问 helper：
+
+- `has_identifier_field_id()` / `get_identifier_field_id()`：BY_FIELD_ID 使用。
+- `get_identifier_name()`：BY_NAME 使用；没有显式 string identifier 时返回 `name`。
+- `get_identifier_position()`：BY_INDEX 使用。
+- `file_local_id()`：file reader projection 使用；优先返回 `local_id`，否则回退到 int
+  identifier。这个回退只用于兼容某些 file schema 构造路径，不应重新引入 FE id 语义。
+
+## 强类型位置
+
+### GlobalIndex
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`GlobalIndex` 表示 table/global output block 中的 top-level 列位置。当前等于
+`_params->required_slots` 的下标。
+
+主要使用位置：
+
+- `ColumnMapping::global_index`
+- `TableFilter::global_indices`
+- `TableColumnPredicates` 的 key
+- `ColumnMapResult` / `ResultColumnMapping` 的 key
+- `FilterEntry` map 的 key
+
+`GlobalIndex` 不是 FE slot id，也不是 FE column unique id。
+
+### LocalColumnId
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`LocalColumnId` 表示当前物理文件 schema 的 top-level reader-local column id。
+
+主要使用位置：
+
+- `FileScanRequest::local_positions` 的 key。
+- `LocalColumnIndex::top_level()`。
+- new Parquet reader 创建 top-level column reader。
+- page index、statistics、bloom filter 等 file-local pruning 的 root column key。
+- row position 这类 reader 内部 virtual column id。
+
+`LocalColumnId` 不是 file-local block position。一个 top-level file column 在本次 scan request
+输出 block 中的位置由 `LocalIndex` 表示。
+
+### LocalIndex
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`LocalIndex` 表示一次 `FileScanRequest` 内 file-local block 的列位置。
+
+主要使用位置：
+
+- `FileScanRequest::local_positions` 的 value。
+- file-local rewritten `SlotRef` 的 input position。
+- `TableReader` 从 file block 取列。
+- `ParquetScanScheduler` 把 column reader 读出的数据写入 file block。
+
+`LocalIndex` 是 request-local block layout，不是 file schema ordinal。
+
+### ConstantIndex
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`ConstantIndex` 表示 `ConstantMap` 中的 entry 位置。它用于 per-split/per-file 常量列：
+
+- partition column。
+- schema evolution default column。
+- generated/default expression column。
+- 将来可扩展到更多 virtual/constant source。
+
+`FilterEntry` 可以指向 `ConstantIndex`。当一个 row-level conjunct 只引用 constant target 时，
+`TableReader` 会在打开 file reader 前用 1 行常量 block 求值；如果结果为 false/NULL，当前 split
+直接跳过。
+
+### LocalColumnIndex
+
+定义位置：`be/src/format_v2/column_data.h`
+
+`LocalColumnIndex` 表示递归 file-local projection path：
+
+```cpp
+struct LocalColumnIndex {
+    int32_t index = -1;
+    bool project_all_children = true;
+    std::vector<LocalColumnIndex> children;
+};
+```
+
+语义：
+
+- root entry 的 `index` 是 `LocalColumnId`。
+- nested entry 的 `index` 是当前 parent 下的 file-local child id。
+- `project_all_children = true` 表示读取整个 subtree。
+- `project_all_children = false` 表示只读取 `children` 中列出的 child paths。
+
+通用 helper：
+
+- `is_full_projection()`
+- `is_partial_projection()`
+- `find_child_projection()`
+- `is_child_projected()`
+- `merge_local_column_index()`
+
+new Parquet reader 的 STRUCT/LIST/MAP reader 都消费这套 projection helper：
+
+- STRUCT：只创建被投影 child 的 reader。
+- LIST：把 element projection 递归传给 element reader。
+- MAP：总是读取 key，把 value projection 递归传给 value reader。
+
+## FileScanRequest
+
+定义位置：`be/src/format_v2/file_reader.h`
+
+`FileScanRequest` 是 table reader 交给 file reader 的唯一 scan 输入。它不包含 table/global schema。
+
+关键字段：
+
+- `predicate_columns`：row-level conjunct/delete conjunct 需要先读取的 file-local projection。
+- `non_predicate_columns`：最终输出需要读取、且不需要先参与 row-level filter 的 file-local
+  projection。
+- `local_positions`：`LocalColumnId -> LocalIndex`，决定 file-local block layout。
+- `conjuncts` / `delete_conjuncts`：已经把 table/global slot 改写成 file-local slot 的表达式。
+- `column_predicate_filters`：file-layer pruning hints，只用于 min/max、page index、dictionary、
+  bloom filter 等剪枝，不参与 batch row filtering。
+
+`predicate_columns` 和 `non_predicate_columns` 都按 file-local schema 表达。file reader 只需要根据
+这两个列表创建 reader，并按 `local_positions` 写入 file block。
+
+## TableColumnMapper 逻辑
+
+定义位置：
+
+- `be/src/format_v2/column_mapper.h`
+- `be/src/format_v2/column_mapper.cpp`
+
+### 匹配模式
+
+`TableColumnMapperOptions::mode` 决定 `identifier` 的解释方式：
+
+- `BY_FIELD_ID`：`TYPE_INT` identifier 是 field id。
+- `BY_NAME`：`TYPE_STRING` identifier 或 `name` 是匹配名。
+- `BY_INDEX`：`TYPE_INT` identifier 是 file schema position。
+
+`TableReader::open_reader()` 当前默认按 field id 映射；如果 file schema 首列没有 int identifier，
+会 fallback 到 BY_NAME。Hive reader 可覆盖默认模式，Hive1 ORC 这类场景可使用 BY_INDEX。
+
+### create_mapping()
+
+`create_mapping()` 为每个 `GlobalIndex` 生成一个 `ColumnMapping`：
+
+1. partition column 优先映射到 `ConstantMap`。
+2. BY_INDEX 时按 file position 取 file schema。
+3. 普通列通过 matcher 在 file schema 中找对应 file field。
+4. 缺失但带 default expr 的列映射到 `ConstantMap`。
+5. 特殊 virtual column 记录 virtual column type。
+6. 允许 missing column 时保留空 mapping，由 table finalize 阶段补 NULL/default。
+
+`ColumnMapping::file_local_id` 是 table column 绑定到 file schema 后的 reader-local id：
+
+- root mapping 中可转成 `LocalColumnId`。
+- nested mapping 中表示 parent 下的 child id。
+- constant/missing/virtual mapping 没有 `file_local_id`。
+
+schema identity field id 不保存在 `ColumnMapping` 中，只保存在
+`ColumnDefinition::identifier` 中，并由 mapper 的匹配模式解释。
+
+### create_scan_request()
+
+`create_scan_request()` 把 table-level scan 信息转换成 file-local request：
+
+1. 先把不参与 row-level filter 的输出列加入 `non_predicate_columns`。
+2. 调用 `localize_filters()`，把 row-level conjunct 和 column predicates 定位到 file-local source。
+3. 为所有已读取 file column 重建 output projection，让 `ColumnMapping::projection` 指向正确的
+   `LocalIndex`。
+4. 生成 `ColumnMapResult` 和 `ResultColumnMapping`，供 table reader finalize。
+
+`local_positions` 在这个阶段确定。同一个 file column 如果同时被 filter 和 output 使用，只会有
+一个 `LocalIndex`。
+
+### FilterEntry
+
+`FilterEntry` 是 `GlobalIndex` 到 filter target 的结果：
+
+- `LOCAL`：filter 可以在 file-local block 上求值，target 是 `LocalIndex`。
+- `CONSTANT`：filter 只依赖 `ConstantMap` entry。
+- `UNSET`：当前 split 无法下推到 file reader。
+
+`TableColumnMapper::_build_filter_entries()` 在 `FileScanRequest::local_positions` 确定后生成
+`FilterEntry`。表达式改写时只把 `LOCAL` target 改写成 file-local slot；`CONSTANT` target 用于
+split-level constant filter evaluation。
+
+### ColumnMapResult / ResultColumnMapping
+
+`ColumnMapResult` 记录一个 global result column 的递归映射结果：
+
+- `local_column_id`：root file column。
+- `column_index`：file-local projection tree。
+- `mapping`：root 指向 `LocalIndex`，nested child 通过 `IndexMapping::child_mapping` 递归映射。
+
+`ResultColumnMapping` 是最终可消费的 `GlobalIndex -> ColumnMapEntry` map。`ColumnMapEntry` 包含：
+
+- `IndexMapping mapping`
+- `local_type`
+- `global_type`
+- `filter_conversion`
+
+TableReader finalize 阶段用它把 file-local block 转成 table/global block。
+
+### nested child mapping
+
+复杂列映射时，`IndexMapping::child_mapping` 的 key 是 table/global child ordinal，value 是对应
+file-local child mapping。这样 filter 中的 `STRUCT_EXTRACT` 可以按 table child ordinal 找到
+file child ordinal。
+
+Doris 不再维护额外的 `NestedPredicateTargetInfo` / filter target path。nested filter localization
+直接沿 `IndexMapping::child_mapping` 转换 selector path。
+
+对于 `SELECT s.name WHERE s.id > 5` 这类 filter-only child：
+
+- `s.name` 进入 output projection。
+- `s.id` 会进入 predicate projection。
+- `original_file_children` 保留 projection 前的 file children，用于定位 filter-only child。
+- `child_mappings` 只描述输出 shape，避免 filter-only child 改变最终 STRUCT/LIST/MAP shape。
+
+## Parquet 内部 schema 标识
+
+定义位置：`be/src/format_v2/parquet/parquet_column_schema.h`
+
+`ParquetColumnSchema` 是 new Parquet reader 内部 schema tree。它描述 Parquet 逻辑字段和 primitive
+leaf column 的关系，不暴露给 table reader。对外统一通过 `ParquetReader::get_schema()` 返回
+`std::vector<format::ColumnDefinition>`。
+
+关键字段：
+
+- `local_id`：当前 parent 下的 reader-local id。top-level 是 root field ordinal，nested 是 child
+  ordinal。`LocalColumnIndex` 传给 `ParquetColumnReaderFactory` 的就是这个 id。
+- `parquet_field_id`：Parquet schema element 中可选的 field_id。Arrow 在不存在 field_id 时返回
+  `-1`。它只作为 schema matching identifier，不用于读取 column chunk。
+- `name`：Parquet schema name。
+- `type`：转换后的 Doris 类型。
+- `leaf_column_id`：Parquet primitive leaf column ordinal。用于访问 `ColumnDescriptor`、
+  row group column chunk、statistics、page index、bloom filter 等。复杂节点为 `-1`。
+- `type_descriptor`：primitive leaf 的 Parquet physical/logical type 信息。
+- `descriptor`：primitive leaf 的 Arrow Parquet `ColumnDescriptor`。
+- `max_definition_level` / `max_repetition_level`：该 node 下的最大 Dremel level。
+- `nullable_definition_level`：当前 node 自身为 NULL 时对应的 definition level。
+- `repeated_repetition_level`：当前或最近 repeated container 的 repetition level。
+
+`ParquetReader::get_schema()` 会把 `ParquetColumnSchema` 转成 `ColumnDefinition`：
+
+- 如果 `parquet_field_id >= 0`，`ColumnDefinition::identifier` 是 `TYPE_INT` field id。
+- 否则 `identifier` 是 `TYPE_STRING` name。
+- `ColumnDefinition::local_id` 是 `ParquetColumnSchema::local_id`。
+- children 递归转换。
+
+因此 table reader 可以按 field id 或 name 匹配，而 Parquet reader 自己仍只按 `local_id`、
+`leaf_column_id` 和 Dremel levels 读取数据。
+
+## 端到端流转
+
+一次 split 的列标识流转如下：
+
+1. `FileScannerV2::_build_projected_columns()`：
+   FE `slot_id` / `col_unique_id` 被翻译成 `GlobalIndex`，并生成 table-side
+   `ColumnDefinition`。
+2. `ParquetReader::init()`：
+   解析 Arrow Parquet schema，构造内部 `ParquetColumnSchema`。
+3. `ParquetReader::get_schema()`：
+   把 Parquet 内部 schema 暴露成 file-side `ColumnDefinition`。
+4. `TableReader::open_reader()`：
+   根据 file schema 是否带 int identifier 选择 BY_FIELD_ID 或 BY_NAME，并调用 mapper。
+5. `TableColumnMapper::create_mapping()`：
+   用 `ColumnDefinition::identifier` 匹配 table/global schema 和 file-local schema，生成
+   `ColumnMapping`。
+6. `TableColumnMapper::create_scan_request()`：
+   生成 `FileScanRequest`，其中所有 projection 和 block position 都是 file-local 的。
+7. `ParquetReader::open()`：
+   校验 `LocalColumnId`，用 `LocalColumnIndex` 创建 column readers，并规划 row group pruning。
+8. `ParquetScanScheduler`：
+   按 `local_positions` 把 predicate/non-predicate column 写入 file-local block。
+9. `TableReader` finalize：
+   使用 `ResultColumnMapping`、`ConstantMap` 和 projection expression，把 file-local block 转成
+   table/global output block。
+
+## 使用约定
+
+修改 new reader 代码时应遵守以下约定：
+
+- 不要在 table/file reader 层重新传递 FE column unique id。
+- 不要把 `ColumnDefinition::identifier` 当作 file reader 读取 id。
+- 不要把 `LocalColumnId` 当作 block position；block position 使用 `LocalIndex`。
+- 不要把 `LocalIndex` 当作 schema ordinal。
+- `LocalColumnIndex::index` 在 root 和 child 层含义不同，调用方必须知道当前 projection node
+  所在层级。
+- file reader 只能消费 `FileScanRequest`，不能理解 partition/default/generated/table schema。
+- column predicate pruning 是 file-layer hint，不等价于 row-level filter。
+- constant filter 可以在 table reader 层提前求值，但不应下推到 file reader。
+
+## 已知限制
+
+TVF 查询 Parquet 且文件没有 field id 时，top-level BY_NAME 已经可以通过 name identifier 工作。
+但 nested access path 的 fallback 目前仍有一处 TODO：STRUCT child fallback 使用 struct ordinal
+构造 int identifier。对于没有 field id 的 nested Parquet schema，BY_NAME 场景应保留 string
+identifier，让 `TableColumnMapper` 从 Parquet file schema 中按 name 解析 file-local child id。
+该问题已在 `be/src/exec/scan/file_scanner_v2.cpp` 代码中记录，当前未修复。
diff --git a/docs/new-parquet-reader-ut-improvement-plan.md b/docs/new-parquet-reader-ut-improvement-plan.md
new file mode 100644
index 00000000000000..4ece111d0d6323
--- /dev/null
+++ b/docs/new-parquet-reader-ut-improvement-plan.md
@@ -0,0 +1,325 @@
+# New Parquet Reader UT Improvement Plan
+
+本文档评估 Doris new parquet reader 当前 UT 覆盖方式，并给出更合理的测试分层、数据构造方法和落地优先级。
+
+目标不是追求形式上的 100% 行覆盖率，而是让测试能够发现 new parquet reader 最容易出错的真实问题：schema 兼容、definition/repetition level 物化、投影/过滤交互、row group/page pruning、delete predicate 以及 schema evolution 组合。
+
+## 当前覆盖方式评估
+
+当前测试分层大体合理：
+
+| 层级 | 代表文件 | 当前价值 |
+|---|---|---|
+| Schema resolver UT | `be/test/format_v2/parquet/parquet_schema_test.cpp` | 直接构造 Parquet schema node，验证 `ParquetColumnSchema` 的 kind、type、level 和非法 schema 拒绝。速度快，适合覆盖 schema 分支。 |
+| Type resolver UT | `be/test/format_v2/parquet/parquet_type_test.cpp` | 覆盖 physical/logical/converted type 到 Doris type 的映射。 |
+| Leaf value UT | `be/test/format_v2/parquet/parquet_leaf_reader_test.cpp` | 覆盖 nullable spacing、binary/fixed/bool/float16 等 leaf append 细节。 |
+| Column reader UT | `be/test/format_v2/parquet/parquet_column_reader_test.cpp` | 用 Arrow writer 生成真实 parquet 文件，覆盖 scalar/struct/list/map 的 read、skip、select、overflow。 |
+| File reader UT | `be/test/format_v2/parquet/parquet_reader_test.cpp` | 覆盖 open/read、多 row group、predicate selection、statistics/dictionary/page index pruning、row position、delete predicate。 |
+| Table reader UT | `be/test/format_v2/table_reader_test.cpp` | 覆盖 table schema 到 file schema mapping、aggregate pushdown、default value、Iceberg delete/virtual column 等跨层行为。 |
+
+这个方向是正确的，但目前有三个明显缺口：
+
+1. Schema 兼容测试和真实读取测试之间缺少桥接。`parquet_schema_test.cpp` 可以证明 legacy LIST/MAP schema 被解析成期望的 tree，但不能证明 `ListColumnReader`、`MapColumnReader` 可以正确消费对应 def/rep levels。
+2. 真实 parquet 文件主要由 Arrow writer 生成。Arrow 生成的文件通常符合标准 layout，不能充分代表 Hive、Spark、old parquet-mr、旧 Doris 或其它 legacy writer 的 schema 形态。
+3. 异常路径和组合路径覆盖不足。比如 optional map key 被 schema 接受后，真实数据中 key 为 null 必须在 materialize 阶段报错；key/value stream 不对齐、invalid repeated level、non-nullable complex column 读到 null 等 corruption 路径需要专门测试。
+
+## 改进原则
+
+1. 按风险分层测试，不用单一大 fixture 覆盖所有逻辑。
+2. Schema resolver 只验证 schema 归一化，不承担真实读取正确性的证明。
+3. Def/rep level materialization 要有直接单测，避免所有边界都依赖真实 parquet 文件构造。
+4. 对 legacy layout 使用 golden parquet corpus，而不是只用 Arrow writer 动态生成。
+5. Reader 集成测试覆盖跨模块行为，避免在 SQL regression 中验证过多 BE 内部细节。
+6. SQL regression 只保留用户可见和跨层最关键路径，避免回归测试过慢。
+
+## 推荐测试分层
+
+### L0: Schema Resolver Table-Driven UT
+
+位置：`be/test/format_v2/parquet/parquet_schema_test.cpp`
+
+职责：覆盖 `parquet_column_schema.cpp` 的 schema 归一化规则。建议把 LIST/MAP case 整理成 table-driven 形式，每个 case 明确：
+
+- 输入 schema layout
+- 是否成功
+- top-level kind/type/nullability
+- child kind/name/type/nullability
+- definition/repetition level
+- error message 关键字
+
+必须覆盖的 schema 形态：
+
+| 类别 | Case |
+|---|---|
+| LIST 标准格式 | Standard 3-level list: `optional group a (LIST) { repeated group list { optional int32 element; } }` |
+| LIST legacy | repeated primitive, repeated group named `array`, repeated group named `<list_name>_tuple`, repeated group with multiple children |
+| LIST wrapper 判定 | repeated group with logical annotation, repeated group whose only child is repeated, repeated group whose only child is optional scalar |
+| Bare repeated | repeated primitive field, repeated group field inside struct |
+| MAP 标准格式 | required/optional outer map, required/optional value |
+| MAP 兼容格式 | optional key accepted at schema level, `MAP_KEY_VALUE` converted annotation |
+| Invalid schema | LIST outer has zero/multiple children, non-repeated LIST child, MAP outer has zero/multiple children, primitive MAP entry, non-repeated MAP entry, entry child count not equal to 2, repeated outer LIST/MAP in normal mode |
+| Unsupported type | UTC TIME rejection, unsupported physical/logical type |
+
+L0 的验收标准：schema branch 新增或修改时，必须有对应 table-driven case；但 L0 通过不代表 reader 行为充分。
+
+### L1: Def/Rep Level Materializer UT
+
+位置建议：
+
+- `be/test/format_v2/parquet/parquet_nested_materializer_test.cpp`
+- 或拆分为 `parquet_list_column_reader_test.cpp`、`parquet_map_column_reader_test.cpp`
+
+职责：用 fake child reader 直接喂 definition levels、repetition levels 和 leaf values，验证 `ListColumnReader` / `MapColumnReader` 的 offsets、nullmap、child values、cursor 和错误路径。
+
+这种方式比构造真实 parquet 文件更适合覆盖边界，因为 def/rep level 是复杂类型 reader 的核心输入。
+
+建议增加测试工具：
+
+```cpp
+class FakeNestedColumnReader final : public ParquetColumnReader {
+public:
+    Status load_nested_batch(int64_t rows) override;
+    Status build_nested_column(int64_t length_upper_bound, MutableColumnPtr& column,
+                               int64_t* values_read) override;
+    const std::vector<int16_t>& nested_definition_levels() const override;
+    const std::vector<int16_t>& nested_repetition_levels() const override;
+    int64_t nested_levels_written() const override;
+};
+```
+
+必须覆盖的 materialize case：
+
+| 类别 | Case |
+|---|---|
+| LIST 正常路径 | null list, empty list, list with values, list with null element, consecutive repeated elements |
+| LIST 操作 | read 分批、skip 后 read、select 非连续行、select 跨 overflow 边界 |
+| LIST 异常 | first level has `rep_level == list.repetition_level`, non-nullable LIST 读到 null, child value count 不匹配 |
+| MAP 正常路径 | null map, empty map, one entry, multiple entries, nullable value, complex value |
+| MAP 操作 | read 分批、skip 后 read、select 非连续行、value scalar path 和 complex value path |
+| MAP 异常 | null key, value stream ended before key stream, key/value repetition level 不对齐, key count 不匹配, value count 不匹配, non-nullable MAP 读到 null |
+
+L1 的验收标准：`ListColumnReader::build_nested_column()` 和 `MapColumnReader::build_nested_column()` 的主要分支必须有直接 UT；corruption path 不能只靠真实文件偶然触发。
+
+### L2: Golden Parquet Corpus UT
+
+位置建议：
+
+- 数据文件：`be/test/exec/test_data/parquet_v2_compat/`
+- 测试文件：`be/test/format_v2/parquet/parquet_compat_corpus_test.cpp`
+
+职责：保存小型真实 parquet 文件，覆盖非 Arrow 标准 writer 或难以用 Arrow writer 生成的 legacy layout。每个文件控制在几十行以内，配套记录 schema 来源和 expected output。
+
+建议文件来源：
+
+| 来源 | 覆盖目标 |
+|---|---|
+| Arrow writer | 标准 LIST/MAP、page v2、dictionary/plain、不同 row group/page size |
+| Spark | Spark nested list/map schema、nullable struct/list/map 混合 |
+| Hive/parquet-mr | legacy two-level list、optional map key、`array` / `bag` / `key_value` 等命名兼容 |
+| 手工生成 | malformed-but-parseable def/rep level edge case，或特殊 converted annotation |
+
+Golden 文件命名建议：
+
+```text
+be/test/exec/test_data/parquet_v2_compat/
+  list_two_level_repeated_primitive.parquet
+  list_tuple_struct_element.parquet
+  list_repeated_group_with_logical_map_element.parquet
+  map_optional_key_no_null.parquet
+  map_optional_key_with_null.parquet
+  map_value_list_nullable.parquet
+  nested_list_struct_map_list.parquet
+  README.md
+```
+
+每个 corpus case 至少验证：
+
+- `get_schema()` 输出是否符合预期
+- full read 输出是否符合预期
+- projection read 输出是否符合预期
+- skip/select 后输出是否符合预期
+- 预期失败文件是否返回明确错误
+
+L2 的验收标准：每一个 schema compatibility rule 至少有一个真实 parquet 文件证明 reader 可以消费该 layout。
+
+### L3: New Parquet Reader Integration UT
+
+位置：`be/test/format_v2/parquet/parquet_reader_test.cpp`
+
+职责：覆盖 file reader 层的组合行为，不重复 L1 的低层 def/rep 细节。
+
+建议补充或保留以下组合：
+
+| 类别 | Case |
+|---|---|
+| Projection + predicate | `SELECT s.b WHERE s.a > x` 对应 file-local projection 与 predicate projection 合并 |
+| Complex non-predicate select | predicate 过滤后，非谓词复杂列通过 selection vector 读取 |
+| Row group/page pruning + complex projection | page index 缩小 row ranges 后，list/map/struct 输出行数和 offsets 正确 |
+| Dictionary/statistics pruning | nested scalar leaf predicate 可 prune，但 repeated leaf 不做错误 aggregate/pruning |
+| Delete predicate | delete predicate 和 query predicate 同时作用时 row position、selection、输出列一致 |
+| Timestamp TZ | timestamp tz mapping 后 schema、read、min/max pushdown 一致 |
+| Reopen split | 同一个 reader reopen 不残留 selection、cast、predicate projection、page skip state |
+
+L3 的验收标准：跨 reader state 的行为必须有 UT，尤其是 reopen、filter 后 selection、page skip 后 output column 不 double skip。
+
+### L4: Table Reader And SQL Regression
+
+位置：
+
+- `be/test/format_v2/table_reader_test.cpp`
+- `regression-test/suites/external_table_p*_parquet/` 或现有 parquet 外表相关目录
+
+职责：覆盖用户可见行为和 FE/BE 接口组合，不在 regression 中验证 BE 内部 offset/nullmap 细节。
+
+建议保留少量高价值 SQL regression：
+
+| 场景 | SQL 覆盖 |
+|---|---|
+| Legacy LIST/MAP 文件可读 | `SELECT *`, `SELECT nested_child`, `WHERE nested_child predicate` |
+| Schema evolution | missing nested child with default, reordered/renamed nested field |
+| Predicate pushdown 正确性 | row group/page pruning 开关开启时结果与关闭时一致 |
+| Aggregate pushdown 正确性 | `count`, `min`, `max` 对 flat leaf 和 supported nested single leaf 正确；repeated leaf fallback |
+| Iceberg/Paimon delete | delete vector / position delete / equality delete 与 parquet reader 组合结果正确 |
+
+L4 的验收标准：新增用户可见兼容能力时必须有 SQL regression；纯内部 refactor 不强制补 SQL regression，但需要 L0-L3 覆盖。
+
+## 覆盖矩阵
+
+下面的矩阵用于判断新改动应该补哪一层测试。
+
+| 逻辑区域 | L0 Schema | L1 Def/Rep | L2 Corpus | L3 Reader | L4 SQL |
+|---|---:|---:|---:|---:|---:|
+| Parquet type mapping | 必须 | 不需要 | 可选 | 可选 | 可选 |
+| LIST/MAP schema compatibility | 必须 | 可选 | 必须 | 可选 | 必须覆盖用户可见新增能力 |
+| Bare repeated field | 必须 | 必须 | 必须 | 可选 | 可选 |
+| List offsets/nullmap | 不足 | 必须 | 必须 | 必须 | 可选 |
+| Map offsets/nullmap/key validation | 不足 | 必须 | 必须 | 必须 | 可选 |
+| Projection pruning | 可选 | 可选 | 必须 | 必须 | 必须覆盖用户可见路径 |
+| Predicate selection | 不需要 | 可选 | 可选 | 必须 | 必须覆盖关键路径 |
+| Statistics/dictionary/page pruning | 不需要 | 不需要 | 可选 | 必须 | 结果一致性必须 |
+| Aggregate pushdown | 不需要 | 不需要 | 可选 | 必须 | 必须 |
+| Delete predicate / row position | 不需要 | 不需要 | 可选 | 必须 | Iceberg/Paimon 必须 |
+| Error/corruption path | 必须覆盖 schema error | 必须覆盖 materialize error | 必须覆盖真实坏文件 | 可选 | 可选 |
+
+## 推荐优先级
+
+### P0: 立即补齐的正确性保护
+
+1. 为 legacy LIST schema 增加真实读取 corpus：
+   - repeated primitive list
+   - `<list_name>_tuple` struct element
+   - repeated group with multiple children
+2. 为 optional MAP key 增加两类真实读取：
+   - optional key 但所有 key 非 null，读取成功
+   - optional key 且存在 null key，读取失败并包含 `contains null key`
+3. 增加 fake def/rep level materializer UT：
+   - list null/empty/null element/multi element
+   - map null/empty/null value/multi entry/null key
+4. 增加 skip/select 覆盖：
+   - legacy list corpus 上执行 skip/select
+   - map value list 或 list struct map list 上执行 select
+
+### P1: 组合路径保护
+
+1. Projection + predicate 同时命中同一 nested struct 的不同 child。
+2. Page index pruning 后读取 complex output column，验证没有 double skip。
+3. Row group statistics/dictionary pruning 后从后续 row group 读取 nested column。
+4. Reopen split 后 predicate projection、selection vector、page skip plan 不残留。
+
+### P2: 完整性和长期质量
+
+1. 建立 `parquet_v2_compat` corpus README，记录文件生成方式、writer 版本、schema、预期行为。
+2. 对 changed files 定期跑 coverage，关注 branch coverage，不只看 line coverage。
+3. 对 schema resolver 增加 table-driven case，减少散落 assert。
+4. 对 materializer 增加 fuzz/property-style 小范围测试：随机生成合法 list/map rows，转换为 def/rep levels 后读回比较原始 logical rows。
+
+## 测试数据构造建议
+
+### 动态生成数据
+
+适合：
+
+- Arrow 标准 schema
+- row group/page size 控制
+- dictionary/plain/page index/statistics 行为
+- type mapping 常规 case
+
+优点是无需维护二进制文件，case 可读性高。
+
+缺点是不能覆盖大量 legacy writer layout。
+
+### Golden parquet 文件
+
+适合：
+
+- Hive/Spark/parquet-mr legacy LIST/MAP schema
+- Arrow writer 不容易生成的 converted annotation
+- malformed-but-parseable 文件
+- 兼容性回归保护
+
+要求：
+
+1. 文件尽量小，通常 3 到 20 行。
+2. 配套 README 说明生成命令、writer 版本、schema、逻辑数据。
+3. 不在 UT 中依赖外部网络或外部服务。
+4. 预期结果在 C++ UT 中直接断言，SQL regression 的 `.out` 仍由 regression 脚本生成。
+
+### Fake reader 数据
+
+适合：
+
+- def/rep level 边界
+- corruption path
+- cursor/overflow 状态
+- non-nullable output 遇到 null
+
+要求：
+
+1. fake reader 只模拟 `ParquetColumnReader` 必需接口。
+2. 每个 case 明确输入 levels 和 expected logical rows。
+3. 错误 case 检查 `Status` 类型和关键错误文本。
+
+## 验收标准
+
+一个 new parquet reader 改动合入前，建议满足：
+
+1. 改动 schema resolver：至少补 L0；如果新增兼容能力，补 L2；如果用户可见，补 L4。
+2. 改动 list/map/struct reader：至少补 L1 和 L3；涉及 legacy layout 时补 L2。
+3. 改动 pruning/predicate/aggregate：至少补 L3；用户可见 SQL 语义补 L4。
+4. 改动 table reader mapping/schema evolution：至少补 `table_reader_test.cpp`，必要时补 L4。
+5. 新增 error handling：必须有负向 UT，不能只依赖代码审查。
+
+推荐执行命令：
+
+```bash
+./run-be-ut.sh --run '--filter=ParquetSchemaTest.*'
+./run-be-ut.sh --run '--filter=ParquetColumnReaderTest.*:NewParquetReaderTest.*:ParquetScanTest.*'
+./run-be-ut.sh --run '--filter=TableReaderTest.*'
+```
+
+对重要重构或发布前验证，建议执行：
+
+```bash
+./run-be-ut.sh --run '--filter=Parquet*:*TableReaderTest*' --coverage
+```
+
+如果本地工具链无法执行 UT，需要在提交说明或 PR 中明确说明失败原因，并在 CI 或可用环境补跑。
+
+## 不建议的方式
+
+1. 不建议用更多 schema-only case 替代真实读取 case。schema 正确不等于 reader 正确。
+2. 不建议只用 Arrow writer 动态生成文件证明 compatibility。兼容性问题通常来自非 Arrow writer。
+3. 不建议把所有复杂类型组合塞进一个巨大 fixture 后只断言少量输出。失败定位困难，覆盖意图不清晰。
+4. 不建议把内部 def/rep level 边界全部放到 SQL regression。执行慢、定位差、难覆盖异常路径。
+5. 不建议用 100% line coverage 作为合入门槛。更合理的是 changed branch coverage + 风险矩阵覆盖。
+
+## 最小落地计划
+
+第一阶段只需要完成 P0：
+
+1. 新增 `parquet_nested_materializer_test.cpp`，覆盖 list/map def/rep 核心正常和异常路径。
+2. 新增 `be/test/exec/test_data/parquet_v2_compat/README.md` 和 4 到 6 个小型 golden parquet 文件。
+3. 新增 `parquet_compat_corpus_test.cpp`，对 golden 文件做 schema/full read/projection/skip/select 断言。
+4. 将现有 `parquet_schema_test.cpp` 中 LIST/MAP schema case 整理为 table-driven 或至少按类别分组。
+
+完成第一阶段后，才能较有信心地说 new parquet reader 的关键逻辑有有效测试保护；否则当前 UT 只能证明主路径和部分 schema 分支，不能充分发现 legacy compatibility 和 complex materialization 的问题。
diff --git a/docs/parquet-list-map-compat-design.md b/docs/parquet-list-map-compat-design.md
new file mode 100644
index 00000000000000..a02ca6e822aaf0
--- /dev/null
+++ b/docs/parquet-list-map-compat-design.md
@@ -0,0 +1,664 @@
+# Parquet LIST/MAP Compatibility Design
+
+本文描述如何参考 Arrow Parquet 的 LIST/MAP 兼容策略，在 Doris new parquet reader 中支持更多 Parquet 标准和 legacy 复杂类型 schema。
+
+目标不是改变 `ListColumnReader` / `MapColumnReader` 的读取模型，而是在 schema 构建阶段把不同物理 schema 归一化成 Doris 当前 reader 可以消费的统一 `ParquetColumnSchema` tree。
+
+## 背景
+
+Parquet 的复杂类型是通过 group schema、logical/converted annotation、definition levels 和 repetition levels 共同表达的。
+
+标准 LIST/MAP schema 比较明确，但历史 writer 产生过多种 legacy 形态。例如 LIST 可能缺少标准 `list.element` wrapper，MAP entry group 可能叫 `key_value`、`entries` 或其它名字。
+
+Arrow C++ 的处理思路是：
+
+1. 在 Parquet schema conversion 阶段识别标准和 legacy schema。
+2. 将这些 schema 归一化为 Arrow `ListType` / `MapType` / `StructType`。
+3. 后续 reader 只消费归一化后的 nested field tree，不在读取阶段继续判断 legacy schema 名字。
+
+Doris new parquet reader 应采用相同边界：
+
+1. `parquet_column_schema.cpp` 负责兼容不同 LIST/MAP physical schema。
+2. `ParquetColumnSchema` 输出统一的 LIST/MAP child tree。
+3. `ListColumnReader` / `MapColumnReader` / `ParquetLeafReader` 不感知 legacy schema 形态。
+
+## 当前 Doris 限制
+
+当前 `build_node_schema()` 的 LIST 分支只支持标准 3-level LIST：
+
+```text
+optional group a (LIST) {
+  repeated group list {
+    optional int32 element;
+  }
+}
+```
+
+当前限制：
+
+- outer LIST group 必须只有一个 child。
+- repeated child 必须是 group。
+- repeated group 必须只有一个 child。
+- 不支持 repeated primitive list。
+- 不支持 repeated group 多字段 struct element。
+- 不支持 `array` / `<parent>_tuple` 这类 legacy structural name。
+
+当前 MAP 分支支持标准 MAP 结构：
+
+```text
+optional group m (MAP) {
+  repeated group key_value {
+    required binary key;
+    optional int32 value;
+  }
+}
+```
+
+当前限制：
+
+- outer MAP group 必须只有一个 child。
+- entry child 必须 repeated group。
+- entry group 必须正好两个 children。
+- key 必须 required。
+- 不支持 key-only map。
+- 不支持没有 repeated entry layer 的非标准 MAP。
+
+## 设计原则
+
+1. 兼容逻辑只放在 schema 构建阶段。
+2. reader 层继续消费统一 schema tree。
+3. 不支持会改变 reader model 的格式，例如没有 repeated entry layer 的 MAP。
+4. 第一阶段不支持 key-only map，因为 Doris `ColumnMap` 需要 values column。
+5. 对容易误判的 schema 保持严格，避免把普通 struct 错解析成 LIST/MAP。
+6. 支持范围对齐 Arrow 的稳定 legacy compatibility 规则，而不是无限放宽。
+
+MAP projection 语义也保持收敛：
+
+- partial MAP projection 只表示 value subtree pruning，例如 `MAP<K, STRUCT<a,b>>` 投影 `value.b` 后输出 `MAP<K, STRUCT<b>>`。
+- key 不作为可裁剪 projection 子树。reader 始终读取完整 key stream，因为 key stream 决定 entry existence、offsets，并且 key 本身承载 MAP 的 key equality 语义。
+- schema projection 重建 `DataTypeMap` 时保留原始 key type，只根据 projected value child 重建 value type。
+
+## LIST 兼容规则
+
+对于 outer group annotated as `LIST`：
+
+```text
+optional group a (LIST) {
+  repeated ... repeated_child;
+}
+```
+
+先要求：
+
+- outer LIST group 必须只有一个 child。
+- child 必须是 repeated。
+
+然后根据 repeated child 形态判断 element schema node。
+
+### 1. 标准 3-level LIST
+
+```text
+optional group a (LIST) {
+  repeated group list {
+    optional int32 element;
+  }
+}
+```
+
+解析：
+
+- repeated child 是 wrapper。
+- element 是 wrapper 的唯一 child：`list.element`。
+- `ParquetColumnSchema(LIST).children[0]` 指向 element schema。
+
+### 2. Repeated primitive legacy LIST
+
+```text
+optional group a (LIST) {
+  repeated int32 element;
+}
+```
+
+解析：
+
+- repeated primitive 本身是 element。
+- element 本身不 nullable，因为 repeated primitive 不提供额外 optional element level。
+- array 自身 nullable 仍由 outer LIST group 决定。
+
+### 3. Repeated group as struct element
+
+```text
+optional group a (LIST) {
+  repeated group element {
+    optional int32 x;
+    optional binary y;
+  }
+}
+```
+
+解析：
+
+- repeated group 有多个 children。
+- repeated group 本身是 element。
+- element type 是 `STRUCT<x, y>`。
+
+### 4. Legacy structural name
+
+Arrow 会将某些名字视作 structural element，而不是标准 wrapper。
+
+```text
+optional group a (LIST) {
+  repeated group array {
+    optional int32 item;
+  }
+}
+```
+
+```text
+optional group a (LIST) {
+  repeated group a_tuple {
+    optional int32 item;
+  }
+}
+```
+
+解析：
+
+- repeated group 名为 `array`，或名为 `<list_name>_tuple`。
+- repeated group 本身是 element。
+- 即使它只有一个 child，也不要剥掉这一层。
+
+### 5. One-child repeated group wrapper
+
+```text
+optional group a (LIST) {
+  repeated group list {
+    optional int32 element;
+  }
+}
+```
+
+如果 repeated group 只有一个 child，且不是 legacy structural name，则按 wrapper 处理：
+
+- element 是 repeated group 的唯一 child。
+
+但这里不能只按 child 数量判断。需要额外保持 Arrow / parquet-format 的 backward compatibility 规则：
+
+- 如果 repeated group 自身带 `LIST` 或 `MAP` annotation，则 repeated group 本身是 element，不剥 wrapper。
+- 如果 repeated group 的唯一 child 也是 repeated，则 repeated group 本身是 element，不剥 wrapper。
+- 只有当 repeated group 无 logical annotation、唯一 child 非 repeated、且不是 legacy structural name 时，才把它当作标准 wrapper 剥掉。
+
+这样可以避免把 two-level `List<List<T>>`、two-level `List<Map<K, V>>` 或单字段 repeated struct element 错解析成少一层的结构。
+
+## LIST schema resolver
+
+建议在 `parquet_column_schema.cpp` 中新增 helper：
+
+```cpp
+struct ListElementResolution {
+    const parquet::schema::Node* repeated_node = nullptr;
+    const parquet::schema::Node* element_node = nullptr;
+    SchemaBuildContext repeated_context;
+    SchemaBuildContext element_context;
+    bool element_is_repeated_node = false;
+};
+
+Status resolve_list_element_node(
+        const parquet::SchemaDescriptor& schema,
+        const parquet::schema::GroupNode& list_group,
+        const SchemaBuildContext& list_context,
+        ListElementResolution* result);
+```
+
+Resolver 逻辑：
+
+```text
+if list_group.field_count != 1:
+    reject
+
+repeated_node = list_group.field(0)
+if !repeated_node.is_repeated:
+    reject
+
+repeated_context = child_context(list_context, repeated_node, 0)
+
+if repeated_node.is_primitive:
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+repeated_group = as_group(repeated_node)
+if repeated_group.field_count == 0:
+    reject
+
+if repeated_group.field_count > 1:
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+if has_structural_list_name(list_group.name, repeated_group.name):
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+if repeated_group has LIST or MAP annotation:
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+only_child = repeated_group.field(0)
+if only_child.is_repeated:
+    element_node = repeated_node
+    element_context = repeated_context
+    element_is_repeated_node = true
+    return
+
+element_node = only_child
+element_context = child_context(repeated_context, only_child, 0)
+element_is_repeated_node = false
+```
+
+`has_structural_list_name()` 对齐 Arrow 的 legacy rule：
+
+```text
+name == "array" || name == list_name + "_tuple"
+```
+
+## LIST schema build
+
+`build_node_schema()` 的 LIST 分支改为：
+
+```text
+resolve_list_element_node(...)
+
+column_schema.kind = LIST
+column_schema.definition_level = repeated_context.definition_level
+column_schema.repetition_level = repeated_context.repetition_level
+column_schema.repeated_repetition_level = repeated_context.repeated_repetition_level
+
+build child schema from resolved element_node and element_context
+column_schema.type = nullable_if_needed(DataTypeArray(child.type), list_node)
+column_schema.children = [child]
+propagate_child_levels(column_schema)
+```
+
+### repeated group itself as element
+
+当 element 是 repeated group 本身时，需要注意不要把这个 repeated group 再解释成一层 LIST。
+
+预期效果：
+
+```text
+optional group a (LIST) {
+  repeated group element {
+    optional int32 x;
+    optional binary y;
+  }
+}
+```
+
+应构造成：
+
+```text
+LIST
+  child: STRUCT<x, y>
+```
+
+而不是：
+
+```text
+LIST
+  child: LIST or extra repeated container
+```
+
+实现上可以新增一个 internal build mode：
+
+```cpp
+enum class SchemaBuildMode {
+    NORMAL,
+    REPEATED_GROUP_AS_LIST_ELEMENT,
+};
+```
+
+当 mode 是 `REPEATED_GROUP_AS_LIST_ELEMENT`：
+
+- 当前 repeated group 作为 element 本身构造成 STRUCT 或 annotated logical type。
+- 它的 repeated level 已经由 list entry 层消费，不再把 repeated 当作额外 array 层。
+- 如果当前 repeated group 是普通 group，则构造成 `STRUCT` element。
+- 如果当前 repeated group 带 `LIST` annotation，则继续按 LIST 解析它的 child repeated layer，构造成 nested list element。
+- 如果当前 repeated group 带 `MAP` 或 `MAP_KEY_VALUE` annotation，则继续按 MAP 解析它的 child repeated entry layer，构造成 map element。
+- 构造当前 element schema 时，不得再次因为“当前节点本身是 repeated”引入隐式 list；只有它内部的 child repeated layer 才能产生下一层 list/map repetition 语义。
+
+如果希望保持改动更小，也可以新增专用函数：
+
+```cpp
+Status build_repeated_group_as_list_element_schema(...);
+```
+
+该函数至少需要处理 repeated group 作为普通 struct element 的场景；如果选择不用通用 build mode，则还需要显式覆盖 repeated group annotated as LIST/MAP 的场景。
+
+## MAP 兼容规则
+
+对于 outer group annotated as `MAP` 或 legacy `MAP_KEY_VALUE`：
+
+```text
+optional group m (MAP) {
+  repeated group entries {
+    required binary key;
+    optional int32 value;
+  }
+}
+```
+
+支持：
+
+- 只有 outer group 带 `MAP` / `MAP_KEY_VALUE` annotation 时，才进入 MAP 兼容解析。
+- entry group 名字可以是 `key_value`、`entries` 或其它。
+- key/value 字段名不强制必须叫 `key` / `value`。
+- 第一个 child 是 key。
+- 第二个 child 是 value。
+- key 必须 required。
+- value 可以 required 或 optional。
+
+不支持：
+
+- outer MAP group 多个 children。
+- entry child 非 repeated。
+- entry child 是 primitive。
+- entry group 没有 value，即 key-only map。
+- 没有 repeated entry layer 的 MAP。
+- nullable key。
+
+## MAP schema resolver
+
+建议新增 helper：
+
+```cpp
+struct MapEntryResolution {
+    const parquet::schema::GroupNode* entry_group = nullptr;
+    SchemaBuildContext entry_context;
+};
+
+Status resolve_map_entry_group(
+        const parquet::schema::GroupNode& map_group,
+        const SchemaBuildContext& map_context,
+        MapEntryResolution* result);
+```
+
+Resolver 逻辑：
+
+```text
+if map_group.field_count != 1:
+    reject
+
+entry_node = map_group.field(0)
+if !entry_node.is_repeated:
+    reject
+if entry_node.is_primitive:
+    reject
+
+entry_group = as_group(entry_node)
+if entry_group.field_count != 2:
+    reject
+
+key_node = entry_group.field(0)
+value_node = entry_group.field(1)
+if key_node.repetition != REQUIRED:
+    reject
+
+entry_context = child_context(map_context, entry_node, 0)
+return
+```
+
+## MAP schema build
+
+`build_node_schema()` 的 MAP 分支应和 LIST 一样在 schema 构建阶段折叠物理 wrapper。
+`key_value` / `entries` / 任意合法 entry group 只用于解析 repeated entry level，不出现在
+最终 `ParquetColumnSchema.children` 中：
+
+```text
+MAP
+  child[0]: key
+  child[1]: value
+```
+
+构造流程：
+
+```text
+resolve_map_entry_group(...)
+
+column_schema.kind = MAP
+column_schema.definition_level = entry_context.definition_level
+column_schema.repetition_level = entry_context.repetition_level
+column_schema.repeated_repetition_level = entry_context.repeated_repetition_level
+
+build key child from entry_group.field(0)
+build value child from entry_group.field(1)
+
+column_schema.type = nullable_if_needed(DataTypeMap(nullable(key.type), nullable(value.type)), map_node)
+column_schema.children = [key_schema, value_schema]
+propagate_child_levels(column_schema)
+```
+
+这里保持 `MapColumnReader` 的直接 key/value 假设：
+
+- `column_schema.children[0]` 是 key。
+- `column_schema.children[1]` 是 value。
+- MAP node 自身保存 entry repeated group 的 `definition_level` / `repetition_level` /
+  `repeated_repetition_level`，用于 materialize offsets、null map 和 empty map。
+
+注意：`DataTypeMap` 中把 key type 包成 nullable 是 Doris nested column materialization 的内部类型约定，不代表 Parquet nullable key 被支持。Schema resolver 仍必须在 `key_node.repetition != REQUIRED` 时 reject。
+
+## 不支持 key-only map 的原因
+
+Key-only map 可能长这样：
+
+```text
+optional group m (MAP) {
+  repeated group entries {
+    required binary key;
+  }
+}
+```
+
+理论上可以解释为 set-like map 或 `MAP<K, NULL>`，但 Doris `ColumnMap` 需要 keys column 和 values column。
+
+若要支持，需要额外设计：
+
+- synthetic null value schema。
+- constant-null value reader。
+- `MapColumnReader` value stream 缺失时的特殊路径。
+
+这会改变 reader tree，不属于本次 schema compatibility 的最小范围。因此第一阶段明确 reject。
+
+## 不支持 no-entry MAP 的原因
+
+No-entry MAP 可能长这样：
+
+```text
+optional group m (MAP) {
+  required binary key;
+  optional int32 value;
+}
+```
+
+它缺少 repeated entry layer，因此没有 repetition level 可以表达多个 map entries，也无法生成 Doris `ColumnMap` offsets。
+
+这不是标准 MAP，也不是 Arrow 主要兼容的 legacy 形态。第一阶段应 reject。
+
+## 对 reader 层的影响
+
+预期不修改 reader 层核心逻辑。
+
+保持：
+
+- `ListColumnReader` 只读取 `column_schema.children[0]` 作为 element reader。
+- `MapColumnReader` 读取 `column_schema.children[0/1]` 作为 key/value reader。
+- `MapColumnReader` 对 partial MAP projection 只接受 value child projection，显式 key child projection 应 reject；即使只裁剪 value，reader 也必须完整读取 key stream。
+- `ParquetLeafReader` 只负责 leaf records/levels/values 读取和 batch materialization。
+- `nested_column_materializer.*` 只负责 Doris nested Column 构造 helper。
+
+风险点在 LIST repeated group as element：
+
+- 如果该 repeated group 是 struct element，需要确保 schema builder 不把 repeated group 再解释成一个额外 repeated container。
+- 这个风险应通过专用 build mode 或专用 helper 解决。
+
+## 错误处理策略
+
+错误信息应明确指出具体 unsupported schema 原因：
+
+- LIST outer group child count invalid。
+- LIST child is not repeated。
+- LIST repeated group has no child。
+- MAP outer group child count invalid。
+- MAP entry is not repeated group。
+- MAP entry child count is not 2。
+- MAP key is nullable。
+
+不要用过于笼统的 `Unsupported parquet LIST encoding` 覆盖所有错误，否则后续排查文件兼容性问题会困难。
+
+## 测试计划
+
+### LIST 正例
+
+1. 标准 3-level LIST：
+
+```text
+optional group a (LIST) {
+  repeated group list {
+    optional int32 element;
+  }
+}
+```
+
+2. Repeated primitive legacy LIST：
+
+```text
+optional group a (LIST) {
+  repeated int32 element;
+}
+```
+
+3. Repeated group struct element：
+
+```text
+optional group a (LIST) {
+  repeated group element {
+    optional int32 x;
+    optional binary y;
+  }
+}
+```
+
+4. Legacy `array` name：
+
+```text
+optional group a (LIST) {
+  repeated group array {
+    optional int32 item;
+  }
+}
+```
+
+5. Legacy `<parent>_tuple` name：
+
+```text
+optional group a (LIST) {
+  repeated group a_tuple {
+    optional int32 item;
+  }
+}
+```
+
+6. Repeated group annotated as nested LIST：
+
+```text
+optional group a (LIST) {
+  repeated group array (LIST) {
+    repeated int32 array;
+  }
+}
+```
+
+预期解析为 `ARRAY<ARRAY<INT>>`，不要剥掉 `array (LIST)` 这一层。
+
+7. Repeated group annotated as MAP：
+
+```text
+optional group a (LIST) {
+  repeated group array (MAP) {
+    repeated group key_value {
+      required binary key;
+      optional int32 value;
+    }
+  }
+}
+```
+
+预期解析为 `ARRAY<MAP<STRING, INT>>`，不要剥掉 `array (MAP)` 这一层。
+
+8. One-child repeated group whose child is repeated：
+
+```text
+optional group a (LIST) {
+  repeated group element {
+    repeated int32 items;
+  }
+}
+```
+
+预期 repeated group 本身是 struct element，解析为 `ARRAY<STRUCT<items: ARRAY<INT>>>`，不要把 `items` 提升成 list element。
+
+### LIST 反例
+
+1. outer LIST group 多 child。
+2. outer LIST child 非 repeated。
+3. repeated group 无 child。
+4. repeated LIST-annotated outer group，除非它作为 another two-level LIST 的 element 被专门支持。
+
+### MAP 正例
+
+1. 标准 `key_value` entry group。
+2. `entries` entry group name。
+3. entry group 任意名字，但结构为 repeated group with required key and value。
+4. `MAP_KEY_VALUE` legacy converted type。
+5. key/value 字段名非 `key`/`value`，但位置正确。
+
+### MAP 反例
+
+1. nullable key。
+2. outer MAP group 多 child。
+3. entry child 非 repeated。
+4. entry child 是 primitive。
+5. key-only map。
+6. no-entry MAP。
+
+## 实施步骤
+
+1. 在 `parquet_column_schema.cpp` 增加 LIST helper：
+   - `has_structural_list_name()`
+   - `resolve_list_element_node()`
+   - 必要时增加 repeated group as element 的 build helper。
+2. 改造 LIST 分支，输出统一 `ParquetColumnSchemaKind::LIST` schema tree。
+3. 增加 LIST schema/unit/regression 测试。
+   - 覆盖 repeated primitive、multi-field struct element、`array` / `<parent>_tuple` structural name。
+   - 覆盖 two-level `List<List<T>>`、two-level `List<Map<K, V>>`、单 child repeated group 且 child repeated 的 struct element。
+   - read 测试至少覆盖 null list、empty list、单元素、多元素，验证 def/rep materialization。
+4. 增加 MAP helper：
+   - `resolve_map_entry_group()`
+5. 改造 MAP 分支，放宽 entry group 名字限制，但保持 key/value 结构严格，并在 schema build 阶段折叠 entry wrapper，输出 `MAP -> key,value`。
+6. 增加 MAP schema/unit/regression 测试。
+   - 覆盖 entry group 名字兼容。
+   - 覆盖 `ParquetColumnSchema(MAP).children == [key, value]`。
+   - 覆盖 partial MAP projection 只允许 value child，key child projection reject。
+7. 如后续确有需求，再单独设计 key-only map 或 key subtree projection 支持。
+
+## 预期收益
+
+- 支持更多由 Arrow、Spark、Hive、旧 Parquet writer 产生的 LIST/MAP schema。
+- 兼容逻辑集中在 schema builder，reader 层保持稳定。
+- 为后续 complex parquet reader 的兼容性测试建立清晰边界。
diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
index b40ff54fbd829c..cdb3d1a7ed06d7 100644
--- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
+++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java
@@ -24,6 +24,7 @@
 import org.apache.doris.common.security.authentication.PreExecutionAuthenticatorCache;
 
 import com.google.common.base.Preconditions;
+import org.apache.paimon.CoreOptions;
 import org.apache.paimon.data.InternalRow;
 import org.apache.paimon.predicate.Predicate;
 import org.apache.paimon.reader.RecordReader;
@@ -37,6 +38,7 @@
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.TimeZone;
@@ -234,6 +236,8 @@ protected TableSchema parseTableSchema() throws UnsupportedOperationException {
     private void initTable() {
         Preconditions.checkState(params.containsKey("serialized_table"));
         table = PaimonUtils.deserialize(params.get("serialized_table"));
+        table = table.copy(Collections.singletonMap(
+                CoreOptions.READ_BATCH_SIZE.key(), String.valueOf(batchSize)));
         paimonAllFieldNames = PaimonUtils.getFieldNames(this.table.rowType());
         if (LOG.isDebugEnabled()) {
             LOG.debug("paimonAllFieldNames:{}", paimonAllFieldNames);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
index 1d234377d83251..e1311237a603d5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java
@@ -1253,6 +1253,9 @@ public Map<String, SysTable> getSupportedSysTables() {
     public TFileFormatType getFileFormatType(SessionVariable sessionVariable) throws UserException {
         TFileFormatType type = null;
         Table table = getRemoteTable();
+        // now hive self only support mixed with orc/parquet files in table and different partitions
+        // But if mixed with orc/parquet files in table and same partition, will failed when read.
+        // now here hive used table format, so BE will regrard all files in table is same format.
         String inputFormatName = table.getSd().getInputFormat();
         String hiveFormat = HiveMetaStoreClientHelper.HiveFileFormat.getFormat(inputFormatName);
         if (hiveFormat.equals(HiveMetaStoreClientHelper.HiveFileFormat.PARQUET.getDesc())) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
index 17a742b835a4fb..27698c2d1f9700 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java
@@ -47,6 +47,7 @@
 import org.apache.doris.thrift.TFileRangeDesc;
 import org.apache.doris.thrift.TPaimonDeletionFileDesc;
 import org.apache.doris.thrift.TPaimonFileDesc;
+import org.apache.doris.thrift.TPaimonReaderType;
 import org.apache.doris.thrift.TPushAggOp;
 import org.apache.doris.thrift.TTableFormatFileDesc;
 
@@ -262,8 +263,10 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit)
             rangeDesc.setFormatType(TFileFormatType.FORMAT_JNI);
             // Use Paimon native serialization for paimon-cpp reader
             if (sessionVariable.isEnablePaimonCppReader() && split instanceof DataSplit) {
+                fileDesc.setReaderType(TPaimonReaderType.PAIMON_CPP);
                 fileDesc.setPaimonSplit(PaimonUtil.encodeDataSplitToString((DataSplit) split));
             } else {
+                fileDesc.setReaderType(TPaimonReaderType.PAIMON_JNI);
                 fileDesc.setPaimonSplit(PaimonUtil.encodeObjectToString(split));
             }
             // Set table location for paimon-cpp reader
@@ -274,6 +277,7 @@ private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit)
             rangeDesc.setSelfSplitWeight(paimonSplit.getSelfSplitWeight());
         } else {
             // use native reader
+            fileDesc.setReaderType(TPaimonReaderType.PAIMON_NATIVE);
             if (fileFormat.equals("orc")) {
                 rangeDesc.setFormatType(TFileFormatType.FORMAT_ORC);
             } else if (fileFormat.equals("parquet")) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 845030a37c163a..77813e2d62b75f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -96,6 +96,7 @@ public class SessionVariable implements Serializable, Writable {
     public static final String SCAN_QUEUE_MEM_LIMIT = "scan_queue_mem_limit";
     public static final String MAX_SCANNERS_CONCURRENCY = "max_scanners_concurrency";
     public static final String MAX_FILE_SCANNERS_CONCURRENCY = "max_file_scanners_concurrency";
+    public static final String ENABLE_FILE_SCANNER_V2 = "enable_file_scanner_v2";
     public static final String MIN_SCANNERS_CONCURRENCY = "min_scanners_concurrency";
     public static final String MIN_FILE_SCANNERS_CONCURRENCY = "min_file_scanners_concurrency";
     public static final String MIN_SCAN_SCHEDULER_CONCURRENCY = "min_scan_scheduler_concurrency";
@@ -1146,6 +1147,11 @@ public static double getHotValueThreshold() {
             "FileScanNode 扫描数据的最大并发，默认为 16", "The max threads to read data of FileScanNode, default 16"})
     public int maxFileScannersConcurrency = 16;
 
+    @VarAttrDef.VarAttr(name = ENABLE_FILE_SCANNER_V2, needForward = true, description = {
+            "开启后 FileScanNode 会在支持的查询场景使用 FileScannerV2，默认开启",
+            "When enabled, FileScanNode uses FileScannerV2 for supported query scans. Enabled by default."})
+    public boolean enableFileScannerV2 = true;
+
     @VarAttrDef.VarAttr(name = LOCAL_EXCHANGE_FREE_BLOCKS_LIMIT)
     public int localExchangeFreeBlocksLimit = 4;
 
@@ -2987,10 +2993,9 @@ public static boolean isEagerAggregationOnJoin() {
     public static final String ENABLE_MC_LIMIT_SPLIT_OPTIMIZATION = "enable_mc_limit_split_optimization";
     @VarAttrDef.VarAttr(
             name = ENABLE_EXTERNAL_TABLE_BATCH_MODE,
-            fuzzy = true,
             description = {"使能外表的 batch mode 功能", "Enable the batch mode function of the external table."},
             needForward = true)
-    public boolean enableExternalTableBatchMode = true;
+    public boolean enableExternalTableBatchMode = false;
 
     @VarAttrDef.VarAttr(
             name = ENABLE_MC_LIMIT_SPLIT_OPTIMIZATION,
@@ -3954,13 +3959,6 @@ private void setFuzzyForCatalog(Random random) {
         this.hiveTextCompression = Util.getRandomString(
                 "gzip", "defalte", "bzip2", "zstd", "lz4", "lzo", "snappy", "plain");
 
-        // batch mode
-        this.enableExternalTableBatchMode = random.nextBoolean();
-        if (this.enableExternalTableBatchMode) {
-            this.numPartitionsInBatchMode = Util.getRandomInt(0, 1024, Integer.MAX_VALUE);
-            this.numFilesInBatchMode = Util.getRandomInt(0, 1024, Integer.MAX_VALUE);
-        }
-
         // common
         this.enableCountPushDownForExternalTable = random.nextBoolean();
     }
@@ -5552,6 +5550,7 @@ public TQueryOptions toThrift() {
         tResult.setScanQueueMemLimit(maxScanQueueMemByte);
         tResult.setMaxScannersConcurrency(maxScannersConcurrency);
         tResult.setMaxFileScannersConcurrency(maxFileScannersConcurrency);
+        tResult.setEnableFileScannerV2(enableFileScannerV2);
         tResult.setMaxColumnReaderNum(maxColumnReaderNum);
         tResult.setParallelPrepareThreshold(parallelPrepareThreshold);
         tResult.setMinScannersConcurrency(minScannersConcurrency);
diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java
index 370e4965765854..4d140b2ba57037 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/property/fileformat/ParquetFileFormatPropertiesTest.java
@@ -47,6 +47,7 @@ public void testAnalyzeFileFormatProperties() {
 
         Assert.assertEquals(TParquetCompressionType.SNAPPY, parquetFileFormatProperties.getParquetCompressionType());
         Assert.assertEquals(false, parquetFileFormatProperties.isParquetDisableDictionary());
+        Assert.assertTrue(parquetFileFormatProperties.isEnableInt96Timestamps());
     }
 
     @Test
@@ -139,6 +140,7 @@ public void testFullTResultFileSinkOptions() {
         parquetFileFormatProperties.fullTResultFileSinkOptions(sinkOptions);
         Assert.assertEquals(parquetFileFormatProperties.getParquetCompressionType(), sinkOptions.getParquetCompressionType());
         Assert.assertEquals(parquetFileFormatProperties.isParquetDisableDictionary(), sinkOptions.isParquetDisableDictionary());
+        Assert.assertEquals(parquetFileFormatProperties.isEnableInt96Timestamps(), sinkOptions.isEnableInt96Timestamps());
     }
 
     @Test
diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift
index c17199d74edf91..a17cd140c93418 100644
--- a/gensrc/thrift/Exprs.thrift
+++ b/gensrc/thrift/Exprs.thrift
@@ -88,6 +88,10 @@ enum TExprNodeType {
   TRY_CAST_EXPR = 41
   // for search DSL function
   SEARCH_EXPR = 42,
+  // Normal predicate expression
+  PREDICATE = 43,
+  // Normal literal
+  LITERAL = 44,
 }
 
 //enum TAggregationOp {
diff --git a/gensrc/thrift/Opcodes.thrift b/gensrc/thrift/Opcodes.thrift
index 1e4002357e7599..a2d709799482eb 100644
--- a/gensrc/thrift/Opcodes.thrift
+++ b/gensrc/thrift/Opcodes.thrift
@@ -97,4 +97,6 @@ enum TExprOpcode {
     MATCH_REGEXP = 76,
     MATCH_PHRASE_EDGE = 77,
     TRY_CAST = 78,
+    // Delete operator from Iceberg/Paimon
+    DELETE = 79,
 }
diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift
index cd2292ca6b63c3..7b628a70e6982d 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -509,6 +509,7 @@ struct TQueryOptions {
   // In read path, read from file cache or remote storage when execute query.
   1000: optional bool disable_file_cache = false
   1001: optional i32 file_cache_query_limit_percent = -1
+  1002: optional bool enable_file_scanner_v2 = false
 }
 
 
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index fb8ef30150e27a..acd0e3975bbd8b 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -355,6 +355,12 @@ struct TPaimonDeletionFileDesc {
     3: optional i64 length;
 }
 
+enum TPaimonReaderType {
+    PAIMON_NATIVE = 0,
+    PAIMON_JNI = 1,
+    PAIMON_CPP = 2,
+}
+
 struct TPaimonFileDesc {
     1: optional string paimon_split
     2: optional string paimon_column_names
@@ -372,6 +378,8 @@ struct TPaimonFileDesc {
     14: optional string paimon_table  // deprecated
     15: optional i64 row_count // deprecated
     16: optional i64 schema_id; // for schema change.
+    // Reader implementation for logical paimon split. Native file split uses range format type.
+    17: optional TPaimonReaderType reader_type;
 }
 
 struct TTrinoConnectorFileDesc {
diff --git a/regression-test/data/export_p0/export/test_show_export.out b/regression-test/data/export_p0/export/test_show_export.out
index 90277ca28f2a9f..eb2d2ab154b1b3 100644
--- a/regression-test/data/export_p0/export/test_show_export.out
+++ b/regression-test/data/export_p0/export/test_show_export.out
@@ -102,156 +102,156 @@
 99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99	99.99	99.99	char99	99
 
 -- !select_load1 --
-1	2017-10-01	2017-10-01T00:00	Beijing	1	1	true	1	1	1.1	1.1	char1	1	1
-10	2017-10-01	2017-10-01T00:00	Beijing	10	10	true	10	10	10.1	10.1	char10	10	10
+1	2017-10-01	2017-10-01T00:00	Beijing	1	1	true	1	1	1.1	1.1	char1	1.000000000	1
+10	2017-10-01	2017-10-01T00:00	Beijing	10	10	true	10	10	10.1	10.1	char10	10.000000000	10
 100	2017-10-01	2017-10-01T00:00	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N
-11	2017-10-01	2017-10-01T00:00	Beijing	11	11	true	11	11	11.11	11.11	char11	11	11
-12	2017-10-01	2017-10-01T00:00	Beijing	12	12	true	12	12	12.12	12.12	char12	12	12
-13	2017-10-01	2017-10-01T00:00	Beijing	13	13	true	13	13	13.13	13.13	char13	13	13
-14	2017-10-01	2017-10-01T00:00	Beijing	14	14	true	14	14	14.14	14.14	char14	14	14
-15	2017-10-01	2017-10-01T00:00	Beijing	15	15	true	15	15	15.15	15.15	char15	15	15
-16	2017-10-01	2017-10-01T00:00	Beijing	16	16	true	16	16	16.16	16.16	char16	16	16
-17	2017-10-01	2017-10-01T00:00	Beijing	17	17	true	17	17	17.17	17.17	char17	17	17
-18	2017-10-01	2017-10-01T00:00	Beijing	18	18	true	18	18	18.18	18.18	char18	18	18
-19	2017-10-01	2017-10-01T00:00	Beijing	19	19	true	19	19	19.19	19.19	char19	19	19
-2	2017-10-01	2017-10-01T00:00	Beijing	2	2	true	2	2	2.2	2.2	char2	2	2
-20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20	20
-21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21	21
-22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22	22
-23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23	23
-24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24	24
-25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25	25
-26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26	26
-27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27	27
-28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28	28
-29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29	29
-3	2017-10-01	2017-10-01T00:00	Beijing	3	3	true	3	3	3.3	3.3	char3	3	3
-30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30	30
-31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31	31
-32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32	32
-33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33	33
-34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34	34
-35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35	35
-36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36	36
-37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37	37
-38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38	38
-39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39	39
-4	2017-10-01	2017-10-01T00:00	Beijing	4	4	true	4	4	4.4	4.4	char4	4	4
-40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40	40
-41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41	41
-42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42	42
-43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43	43
-44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44	44
-45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45	45
-46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46	46
-47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47	47
-48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48	48
-49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49	49
-5	2017-10-01	2017-10-01T00:00	Beijing	5	5	true	5	5	5.5	5.5	char5	5	5
-50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50	50
-51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51	51
-52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52	52
-53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53	53
-54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54	54
-55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55	55
-56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56	56
-57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57	57
-58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58	58
-59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59	59
-6	2017-10-01	2017-10-01T00:00	Beijing	6	6	true	6	6	6.6	6.6	char6	6	6
-60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60	60
-61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61	61
-62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62	62
-63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63	63
-64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64	64
-65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65	65
-66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66	66
-67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67	67
-68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68	68
-69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69	69
-7	2017-10-01	2017-10-01T00:00	Beijing	7	7	true	7	7	7.7	7.7	char7	7	7
-70	2017-10-01	2017-10-01T00:00	Beijing	70	70	true	70	70	70.7	70.7	char70	70	70
-71	2017-10-01	2017-10-01T00:00	Beijing	71	71	true	71	71	71.71	71.71	char71	71	71
-72	2017-10-01	2017-10-01T00:00	Beijing	72	72	true	72	72	72.72	72.72	char72	72	72
-73	2017-10-01	2017-10-01T00:00	Beijing	73	73	true	73	73	73.73	73.73	char73	73	73
-74	2017-10-01	2017-10-01T00:00	Beijing	74	74	true	74	74	74.74	74.74	char74	74	74
-75	2017-10-01	2017-10-01T00:00	Beijing	75	75	true	75	75	75.75	75.75	char75	75	75
-76	2017-10-01	2017-10-01T00:00	Beijing	76	76	true	76	76	76.76	76.76	char76	76	76
-77	2017-10-01	2017-10-01T00:00	Beijing	77	77	true	77	77	77.77	77.77	char77	77	77
-78	2017-10-01	2017-10-01T00:00	Beijing	78	78	true	78	78	78.78	78.78	char78	78	78
-79	2017-10-01	2017-10-01T00:00	Beijing	79	79	true	79	79	79.79	79.79	char79	79	79
-8	2017-10-01	2017-10-01T00:00	Beijing	8	8	true	8	8	8.8	8.8	char8	8	8
-80	2017-10-01	2017-10-01T00:00	Beijing	80	80	true	80	80	80.8	80.8	char80	80	80
-81	2017-10-01	2017-10-01T00:00	Beijing	81	81	true	81	81	81.81	81.81	char81	81	81
-82	2017-10-01	2017-10-01T00:00	Beijing	82	82	true	82	82	82.82	82.82	char82	82	82
-83	2017-10-01	2017-10-01T00:00	Beijing	83	83	true	83	83	83.83	83.83	char83	83	83
-84	2017-10-01	2017-10-01T00:00	Beijing	84	84	true	84	84	84.84	84.84	char84	84	84
-85	2017-10-01	2017-10-01T00:00	Beijing	85	85	true	85	85	85.85	85.85	char85	85	85
-86	2017-10-01	2017-10-01T00:00	Beijing	86	86	true	86	86	86.86	86.86	char86	86	86
-87	2017-10-01	2017-10-01T00:00	Beijing	87	87	true	87	87	87.87	87.87	char87	87	87
-88	2017-10-01	2017-10-01T00:00	Beijing	88	88	true	88	88	88.88	88.88	char88	88	88
-89	2017-10-01	2017-10-01T00:00	Beijing	89	89	true	89	89	89.89	89.89	char89	89	89
-9	2017-10-01	2017-10-01T00:00	Beijing	9	9	true	9	9	9.9	9.9	char9	9	9
-90	2017-10-01	2017-10-01T00:00	Beijing	90	90	true	90	90	90.9	90.9	char90	90	90
-91	2017-10-01	2017-10-01T00:00	Beijing	91	91	true	91	91	91.91	91.91	char91	91	91
-92	2017-10-01	2017-10-01T00:00	Beijing	92	92	true	92	92	92.92	92.92	char92	92	92
-93	2017-10-01	2017-10-01T00:00	Beijing	93	93	true	93	93	93.93	93.93	char93	93	93
-94	2017-10-01	2017-10-01T00:00	Beijing	94	94	true	94	94	94.94	94.94	char94	94	94
-95	2017-10-01	2017-10-01T00:00	Beijing	95	95	true	95	95	95.95	95.95	char95	95	95
-96	2017-10-01	2017-10-01T00:00	Beijing	96	96	true	96	96	96.96	96.96	char96	96	96
-97	2017-10-01	2017-10-01T00:00	Beijing	97	97	true	97	97	97.97	97.97	char97	97	97
-98	2017-10-01	2017-10-01T00:00	Beijing	98	98	true	98	98	98.98	98.98	char98	98	98
-99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99.99	99.99	char99	99	99
+11	2017-10-01	2017-10-01T00:00	Beijing	11	11	true	11	11	11.11	11.11	char11	11.000000000	11
+12	2017-10-01	2017-10-01T00:00	Beijing	12	12	true	12	12	12.12	12.12	char12	12.000000000	12
+13	2017-10-01	2017-10-01T00:00	Beijing	13	13	true	13	13	13.13	13.13	char13	13.000000000	13
+14	2017-10-01	2017-10-01T00:00	Beijing	14	14	true	14	14	14.14	14.14	char14	14.000000000	14
+15	2017-10-01	2017-10-01T00:00	Beijing	15	15	true	15	15	15.15	15.15	char15	15.000000000	15
+16	2017-10-01	2017-10-01T00:00	Beijing	16	16	true	16	16	16.16	16.16	char16	16.000000000	16
+17	2017-10-01	2017-10-01T00:00	Beijing	17	17	true	17	17	17.17	17.17	char17	17.000000000	17
+18	2017-10-01	2017-10-01T00:00	Beijing	18	18	true	18	18	18.18	18.18	char18	18.000000000	18
+19	2017-10-01	2017-10-01T00:00	Beijing	19	19	true	19	19	19.19	19.19	char19	19.000000000	19
+2	2017-10-01	2017-10-01T00:00	Beijing	2	2	true	2	2	2.2	2.2	char2	2.000000000	2
+20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20.000000000	20
+21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21.000000000	21
+22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22.000000000	22
+23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23.000000000	23
+24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24.000000000	24
+25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25.000000000	25
+26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26.000000000	26
+27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27.000000000	27
+28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28.000000000	28
+29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29.000000000	29
+3	2017-10-01	2017-10-01T00:00	Beijing	3	3	true	3	3	3.3	3.3	char3	3.000000000	3
+30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30.000000000	30
+31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31.000000000	31
+32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32.000000000	32
+33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33.000000000	33
+34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34.000000000	34
+35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35.000000000	35
+36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36.000000000	36
+37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37.000000000	37
+38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38.000000000	38
+39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39.000000000	39
+4	2017-10-01	2017-10-01T00:00	Beijing	4	4	true	4	4	4.4	4.4	char4	4.000000000	4
+40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40.000000000	40
+41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41.000000000	41
+42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42.000000000	42
+43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43.000000000	43
+44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44.000000000	44
+45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45.000000000	45
+46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46.000000000	46
+47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47.000000000	47
+48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48.000000000	48
+49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49.000000000	49
+5	2017-10-01	2017-10-01T00:00	Beijing	5	5	true	5	5	5.5	5.5	char5	5.000000000	5
+50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50.000000000	50
+51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51.000000000	51
+52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52.000000000	52
+53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53.000000000	53
+54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54.000000000	54
+55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55.000000000	55
+56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56.000000000	56
+57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57.000000000	57
+58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58.000000000	58
+59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59.000000000	59
+6	2017-10-01	2017-10-01T00:00	Beijing	6	6	true	6	6	6.6	6.6	char6	6.000000000	6
+60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60.000000000	60
+61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61.000000000	61
+62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62.000000000	62
+63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63.000000000	63
+64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64.000000000	64
+65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65.000000000	65
+66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66.000000000	66
+67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67.000000000	67
+68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68.000000000	68
+69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69.000000000	69
+7	2017-10-01	2017-10-01T00:00	Beijing	7	7	true	7	7	7.7	7.7	char7	7.000000000	7
+70	2017-10-01	2017-10-01T00:00	Beijing	70	70	true	70	70	70.7	70.7	char70	70.000000000	70
+71	2017-10-01	2017-10-01T00:00	Beijing	71	71	true	71	71	71.71	71.71	char71	71.000000000	71
+72	2017-10-01	2017-10-01T00:00	Beijing	72	72	true	72	72	72.72	72.72	char72	72.000000000	72
+73	2017-10-01	2017-10-01T00:00	Beijing	73	73	true	73	73	73.73	73.73	char73	73.000000000	73
+74	2017-10-01	2017-10-01T00:00	Beijing	74	74	true	74	74	74.74	74.74	char74	74.000000000	74
+75	2017-10-01	2017-10-01T00:00	Beijing	75	75	true	75	75	75.75	75.75	char75	75.000000000	75
+76	2017-10-01	2017-10-01T00:00	Beijing	76	76	true	76	76	76.76	76.76	char76	76.000000000	76
+77	2017-10-01	2017-10-01T00:00	Beijing	77	77	true	77	77	77.77	77.77	char77	77.000000000	77
+78	2017-10-01	2017-10-01T00:00	Beijing	78	78	true	78	78	78.78	78.78	char78	78.000000000	78
+79	2017-10-01	2017-10-01T00:00	Beijing	79	79	true	79	79	79.79	79.79	char79	79.000000000	79
+8	2017-10-01	2017-10-01T00:00	Beijing	8	8	true	8	8	8.8	8.8	char8	8.000000000	8
+80	2017-10-01	2017-10-01T00:00	Beijing	80	80	true	80	80	80.8	80.8	char80	80.000000000	80
+81	2017-10-01	2017-10-01T00:00	Beijing	81	81	true	81	81	81.81	81.81	char81	81.000000000	81
+82	2017-10-01	2017-10-01T00:00	Beijing	82	82	true	82	82	82.82	82.82	char82	82.000000000	82
+83	2017-10-01	2017-10-01T00:00	Beijing	83	83	true	83	83	83.83	83.83	char83	83.000000000	83
+84	2017-10-01	2017-10-01T00:00	Beijing	84	84	true	84	84	84.84	84.84	char84	84.000000000	84
+85	2017-10-01	2017-10-01T00:00	Beijing	85	85	true	85	85	85.85	85.85	char85	85.000000000	85
+86	2017-10-01	2017-10-01T00:00	Beijing	86	86	true	86	86	86.86	86.86	char86	86.000000000	86
+87	2017-10-01	2017-10-01T00:00	Beijing	87	87	true	87	87	87.87	87.87	char87	87.000000000	87
+88	2017-10-01	2017-10-01T00:00	Beijing	88	88	true	88	88	88.88	88.88	char88	88.000000000	88
+89	2017-10-01	2017-10-01T00:00	Beijing	89	89	true	89	89	89.89	89.89	char89	89.000000000	89
+9	2017-10-01	2017-10-01T00:00	Beijing	9	9	true	9	9	9.9	9.9	char9	9.000000000	9
+90	2017-10-01	2017-10-01T00:00	Beijing	90	90	true	90	90	90.9	90.9	char90	90.000000000	90
+91	2017-10-01	2017-10-01T00:00	Beijing	91	91	true	91	91	91.91	91.91	char91	91.000000000	91
+92	2017-10-01	2017-10-01T00:00	Beijing	92	92	true	92	92	92.92	92.92	char92	92.000000000	92
+93	2017-10-01	2017-10-01T00:00	Beijing	93	93	true	93	93	93.93	93.93	char93	93.000000000	93
+94	2017-10-01	2017-10-01T00:00	Beijing	94	94	true	94	94	94.94	94.94	char94	94.000000000	94
+95	2017-10-01	2017-10-01T00:00	Beijing	95	95	true	95	95	95.95	95.95	char95	95.000000000	95
+96	2017-10-01	2017-10-01T00:00	Beijing	96	96	true	96	96	96.96	96.96	char96	96.000000000	96
+97	2017-10-01	2017-10-01T00:00	Beijing	97	97	true	97	97	97.97	97.97	char97	97.000000000	97
+98	2017-10-01	2017-10-01T00:00	Beijing	98	98	true	98	98	98.98	98.98	char98	98.000000000	98
+99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99.99	99.99	char99	99.000000000	99
 
 -- !select_load1 --
-20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20	20
-21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21	21
-22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22	22
-23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23	23
-24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24	24
-25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25	25
-26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26	26
-27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27	27
-28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28	28
-29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29	29
-30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30	30
-31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31	31
-32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32	32
-33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33	33
-34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34	34
-35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35	35
-36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36	36
-37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37	37
-38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38	38
-39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39	39
-40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40	40
-41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41	41
-42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42	42
-43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43	43
-44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44	44
-45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45	45
-46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46	46
-47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47	47
-48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48	48
-49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49	49
-50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50	50
-51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51	51
-52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52	52
-53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53	53
-54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54	54
-55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55	55
-56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56	56
-57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57	57
-58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58	58
-59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59	59
-60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60	60
-61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61	61
-62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62	62
-63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63	63
-64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64	64
-65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65	65
-66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66	66
-67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67	67
-68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68	68
-69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69	69
+20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20.000000000	20
+21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21.000000000	21
+22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22.000000000	22
+23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23.000000000	23
+24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24.000000000	24
+25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25.000000000	25
+26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26.000000000	26
+27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27.000000000	27
+28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28.000000000	28
+29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29.000000000	29
+30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30.000000000	30
+31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31.000000000	31
+32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32.000000000	32
+33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33.000000000	33
+34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34.000000000	34
+35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35.000000000	35
+36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36.000000000	36
+37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37.000000000	37
+38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38.000000000	38
+39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39.000000000	39
+40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40.000000000	40
+41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41.000000000	41
+42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42.000000000	42
+43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43.000000000	43
+44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44.000000000	44
+45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45.000000000	45
+46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46.000000000	46
+47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47.000000000	47
+48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48.000000000	48
+49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49.000000000	49
+50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50.000000000	50
+51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51.000000000	51
+52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52.000000000	52
+53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53.000000000	53
+54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54.000000000	54
+55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55.000000000	55
+56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56.000000000	56
+57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57.000000000	57
+58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58.000000000	58
+59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59.000000000	59
+60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60.000000000	60
+61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61.000000000	61
+62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62.000000000	62
+63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63.000000000	63
+64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64.000000000	64
+65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65.000000000	65
+66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66.000000000	66
+67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67.000000000	67
+68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68.000000000	68
+69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69.000000000	69
 
diff --git a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
index c8ff8cafdd9854..cd7fe1e40fdb2d 100644
--- a/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
+++ b/regression-test/data/export_p0/outfile/parquet/test_outfile_parquet_complex_type.out
@@ -127,3 +127,15 @@
 9	doris_9	{"user_id":9, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":9, "sex":9, "bool_col":1, "int_col":9, "bigint_col":9, "largeint_col":"9", "float_col":9.9, "double_col":9.9, "char_col":"char9_1234", "decimal_col":9.000000000}
 10	doris_10	{"user_id":10, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":null, "age":null, "sex":null, "bool_col":null, "int_col":null, "bigint_col":null, "largeint_col":null, "float_col":null, "double_col":null, "char_col":null, "decimal_col":null}
 
+-- !select_load7 --
+1	doris_1	{"user_id":1, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":1, "sex":1, "bool_col":1, "int_col":1, "bigint_col":1, "largeint_col":"1", "float_col":1.1, "double_col":1.1, "char_col":"char1_1234", "decimal_col":1.000000000}
+2	doris_2	{"user_id":2, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":2, "sex":2, "bool_col":1, "int_col":2, "bigint_col":2, "largeint_col":"2", "float_col":2.2, "double_col":2.2, "char_col":"char2_1234", "decimal_col":2.000000000}
+3	doris_3	{"user_id":3, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":3, "sex":3, "bool_col":1, "int_col":3, "bigint_col":3, "largeint_col":"3", "float_col":3.3, "double_col":3.3, "char_col":"char3_1234", "decimal_col":3.000000000}
+4	doris_4	{"user_id":4, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":4, "sex":4, "bool_col":1, "int_col":4, "bigint_col":4, "largeint_col":"4", "float_col":4.4, "double_col":4.4, "char_col":"char4_1234", "decimal_col":4.000000000}
+5	doris_5	{"user_id":5, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":5, "sex":5, "bool_col":1, "int_col":5, "bigint_col":5, "largeint_col":"5", "float_col":5.5, "double_col":5.5, "char_col":"char5_1234", "decimal_col":5.000000000}
+6	doris_6	{"user_id":6, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":6, "sex":6, "bool_col":1, "int_col":6, "bigint_col":6, "largeint_col":"6", "float_col":6.6, "double_col":6.6, "char_col":"char6_1234", "decimal_col":6.000000000}
+7	doris_7	{"user_id":7, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":7, "sex":7, "bool_col":1, "int_col":7, "bigint_col":7, "largeint_col":"7", "float_col":7.7, "double_col":7.7, "char_col":"char7_1234", "decimal_col":7.000000000}
+8	doris_8	{"user_id":8, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":8, "sex":8, "bool_col":1, "int_col":8, "bigint_col":8, "largeint_col":"8", "float_col":8.8, "double_col":8.800000000000001, "char_col":"char8_1234", "decimal_col":8.000000000}
+9	doris_9	{"user_id":9, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":"Beijing", "age":9, "sex":9, "bool_col":1, "int_col":9, "bigint_col":9, "largeint_col":"9", "float_col":9.9, "double_col":9.9, "char_col":"char9_1234", "decimal_col":9.000000000}
+10	doris_10	{"user_id":10, "date":"2017-10-01", "datetime":"2017-10-01 00:00:00.000000", "city":null, "age":null, "sex":null, "bool_col":null, "int_col":null, "bigint_col":null, "largeint_col":null, "float_col":null, "double_col":null, "char_col":null, "decimal_col":null}
+
diff --git a/regression-test/data/export_p0/test_export_parquet.out b/regression-test/data/export_p0/test_export_parquet.out
index c3358efa4a97af..941dd4469a66c8 100644
--- a/regression-test/data/export_p0/test_export_parquet.out
+++ b/regression-test/data/export_p0/test_export_parquet.out
@@ -102,104 +102,104 @@
 99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99	99.99	99.99	char99	99	0.0.0.99	::99
 
 -- !select_load1 --
-1	2017-10-01	2017-10-01T00:00	Beijing	1	1	true	1	1	1.1	1.1	char1	1	1	1	::1
-10	2017-10-01	2017-10-01T00:00	Beijing	10	10	true	10	10	10.1	10.1	char10	10	10	10	::10
+1	2017-10-01	2017-10-01T00:00	Beijing	1	1	true	1	1	1.1	1.1	char1	1.000000000	1	1	::1
+10	2017-10-01	2017-10-01T00:00	Beijing	10	10	true	10	10	10.1	10.1	char10	10.000000000	10	10	::10
 100	2017-10-01	2017-10-01T00:00	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N
-11	2017-10-01	2017-10-01T00:00	Beijing	11	11	true	11	11	11.11	11.11	char11	11	11	11	::11
-12	2017-10-01	2017-10-01T00:00	Beijing	12	12	true	12	12	12.12	12.12	char12	12	12	12	::12
-13	2017-10-01	2017-10-01T00:00	Beijing	13	13	true	13	13	13.13	13.13	char13	13	13	13	::13
-14	2017-10-01	2017-10-01T00:00	Beijing	14	14	true	14	14	14.14	14.14	char14	14	14	14	::14
-15	2017-10-01	2017-10-01T00:00	Beijing	15	15	true	15	15	15.15	15.15	char15	15	15	15	::15
-16	2017-10-01	2017-10-01T00:00	Beijing	16	16	true	16	16	16.16	16.16	char16	16	16	16	::16
-17	2017-10-01	2017-10-01T00:00	Beijing	17	17	true	17	17	17.17	17.17	char17	17	17	17	::17
-18	2017-10-01	2017-10-01T00:00	Beijing	18	18	true	18	18	18.18	18.18	char18	18	18	18	::18
-19	2017-10-01	2017-10-01T00:00	Beijing	19	19	true	19	19	19.19	19.19	char19	19	19	19	::19
-2	2017-10-01	2017-10-01T00:00	Beijing	2	2	true	2	2	2.2	2.2	char2	2	2	2	::2
-20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20	20	20	::20
-21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21	21	21	::21
-22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22	22	22	::22
-23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23	23	23	::23
-24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24	24	24	::24
-25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25	25	25	::25
-26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26	26	26	::26
-27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27	27	27	::27
-28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28	28	28	::28
-29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29	29	29	::29
-3	2017-10-01	2017-10-01T00:00	Beijing	3	3	true	3	3	3.3	3.3	char3	3	3	3	::3
-30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30	30	30	::30
-31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31	31	31	::31
-32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32	32	32	::32
-33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33	33	33	::33
-34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34	34	34	::34
-35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35	35	35	::35
-36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36	36	36	::36
-37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37	37	37	::37
-38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38	38	38	::38
-39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39	39	39	::39
-4	2017-10-01	2017-10-01T00:00	Beijing	4	4	true	4	4	4.4	4.4	char4	4	4	4	::4
-40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40	40	40	::40
-41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41	41	41	::41
-42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42	42	42	::42
-43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43	43	43	::43
-44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44	44	44	::44
-45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45	45	45	::45
-46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46	46	46	::46
-47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47	47	47	::47
-48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48	48	48	::48
-49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49	49	49	::49
-5	2017-10-01	2017-10-01T00:00	Beijing	5	5	true	5	5	5.5	5.5	char5	5	5	5	::5
-50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50	50	50	::50
-51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51	51	51	::51
-52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52	52	52	::52
-53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53	53	53	::53
-54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54	54	54	::54
-55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55	55	55	::55
-56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56	56	56	::56
-57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57	57	57	::57
-58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58	58	58	::58
-59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59	59	59	::59
-6	2017-10-01	2017-10-01T00:00	Beijing	6	6	true	6	6	6.6	6.6	char6	6	6	6	::6
-60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60	60	60	::60
-61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61	61	61	::61
-62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62	62	62	::62
-63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63	63	63	::63
-64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64	64	64	::64
-65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65	65	65	::65
-66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66	66	66	::66
-67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67	67	67	::67
-68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68	68	68	::68
-69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69	69	69	::69
-7	2017-10-01	2017-10-01T00:00	Beijing	7	7	true	7	7	7.7	7.7	char7	7	7	7	::7
-70	2017-10-01	2017-10-01T00:00	Beijing	70	70	true	70	70	70.7	70.7	char70	70	70	70	::70
-71	2017-10-01	2017-10-01T00:00	Beijing	71	71	true	71	71	71.71	71.71	char71	71	71	71	::71
-72	2017-10-01	2017-10-01T00:00	Beijing	72	72	true	72	72	72.72	72.72	char72	72	72	72	::72
-73	2017-10-01	2017-10-01T00:00	Beijing	73	73	true	73	73	73.73	73.73	char73	73	73	73	::73
-74	2017-10-01	2017-10-01T00:00	Beijing	74	74	true	74	74	74.74	74.74	char74	74	74	74	::74
-75	2017-10-01	2017-10-01T00:00	Beijing	75	75	true	75	75	75.75	75.75	char75	75	75	75	::75
-76	2017-10-01	2017-10-01T00:00	Beijing	76	76	true	76	76	76.76	76.76	char76	76	76	76	::76
-77	2017-10-01	2017-10-01T00:00	Beijing	77	77	true	77	77	77.77	77.77	char77	77	77	77	::77
-78	2017-10-01	2017-10-01T00:00	Beijing	78	78	true	78	78	78.78	78.78	char78	78	78	78	::78
-79	2017-10-01	2017-10-01T00:00	Beijing	79	79	true	79	79	79.79	79.79	char79	79	79	79	::79
-8	2017-10-01	2017-10-01T00:00	Beijing	8	8	true	8	8	8.8	8.8	char8	8	8	8	::8
-80	2017-10-01	2017-10-01T00:00	Beijing	80	80	true	80	80	80.8	80.8	char80	80	80	80	::80
-81	2017-10-01	2017-10-01T00:00	Beijing	81	81	true	81	81	81.81	81.81	char81	81	81	81	::81
-82	2017-10-01	2017-10-01T00:00	Beijing	82	82	true	82	82	82.82	82.82	char82	82	82	82	::82
-83	2017-10-01	2017-10-01T00:00	Beijing	83	83	true	83	83	83.83	83.83	char83	83	83	83	::83
-84	2017-10-01	2017-10-01T00:00	Beijing	84	84	true	84	84	84.84	84.84	char84	84	84	84	::84
-85	2017-10-01	2017-10-01T00:00	Beijing	85	85	true	85	85	85.85	85.85	char85	85	85	85	::85
-86	2017-10-01	2017-10-01T00:00	Beijing	86	86	true	86	86	86.86	86.86	char86	86	86	86	::86
-87	2017-10-01	2017-10-01T00:00	Beijing	87	87	true	87	87	87.87	87.87	char87	87	87	87	::87
-88	2017-10-01	2017-10-01T00:00	Beijing	88	88	true	88	88	88.88	88.88	char88	88	88	88	::88
-89	2017-10-01	2017-10-01T00:00	Beijing	89	89	true	89	89	89.89	89.89	char89	89	89	89	::89
-9	2017-10-01	2017-10-01T00:00	Beijing	9	9	true	9	9	9.9	9.9	char9	9	9	9	::9
-90	2017-10-01	2017-10-01T00:00	Beijing	90	90	true	90	90	90.9	90.9	char90	90	90	90	::90
-91	2017-10-01	2017-10-01T00:00	Beijing	91	91	true	91	91	91.91	91.91	char91	91	91	91	::91
-92	2017-10-01	2017-10-01T00:00	Beijing	92	92	true	92	92	92.92	92.92	char92	92	92	92	::92
-93	2017-10-01	2017-10-01T00:00	Beijing	93	93	true	93	93	93.93	93.93	char93	93	93	93	::93
-94	2017-10-01	2017-10-01T00:00	Beijing	94	94	true	94	94	94.94	94.94	char94	94	94	94	::94
-95	2017-10-01	2017-10-01T00:00	Beijing	95	95	true	95	95	95.95	95.95	char95	95	95	95	::95
-96	2017-10-01	2017-10-01T00:00	Beijing	96	96	true	96	96	96.96	96.96	char96	96	96	96	::96
-97	2017-10-01	2017-10-01T00:00	Beijing	97	97	true	97	97	97.97	97.97	char97	97	97	97	::97
-98	2017-10-01	2017-10-01T00:00	Beijing	98	98	true	98	98	98.98	98.98	char98	98	98	98	::98
-99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99.99	99.99	char99	99	99	99	::99
+11	2017-10-01	2017-10-01T00:00	Beijing	11	11	true	11	11	11.11	11.11	char11	11.000000000	11	11	::11
+12	2017-10-01	2017-10-01T00:00	Beijing	12	12	true	12	12	12.12	12.12	char12	12.000000000	12	12	::12
+13	2017-10-01	2017-10-01T00:00	Beijing	13	13	true	13	13	13.13	13.13	char13	13.000000000	13	13	::13
+14	2017-10-01	2017-10-01T00:00	Beijing	14	14	true	14	14	14.14	14.14	char14	14.000000000	14	14	::14
+15	2017-10-01	2017-10-01T00:00	Beijing	15	15	true	15	15	15.15	15.15	char15	15.000000000	15	15	::15
+16	2017-10-01	2017-10-01T00:00	Beijing	16	16	true	16	16	16.16	16.16	char16	16.000000000	16	16	::16
+17	2017-10-01	2017-10-01T00:00	Beijing	17	17	true	17	17	17.17	17.17	char17	17.000000000	17	17	::17
+18	2017-10-01	2017-10-01T00:00	Beijing	18	18	true	18	18	18.18	18.18	char18	18.000000000	18	18	::18
+19	2017-10-01	2017-10-01T00:00	Beijing	19	19	true	19	19	19.19	19.19	char19	19.000000000	19	19	::19
+2	2017-10-01	2017-10-01T00:00	Beijing	2	2	true	2	2	2.2	2.2	char2	2.000000000	2	2	::2
+20	2017-10-01	2017-10-01T00:00	Beijing	20	20	true	20	20	20.2	20.2	char20	20.000000000	20	20	::20
+21	2017-10-01	2017-10-01T00:00	Beijing	21	21	true	21	21	21.21	21.21	char21	21.000000000	21	21	::21
+22	2017-10-01	2017-10-01T00:00	Beijing	22	22	true	22	22	22.22	22.22	char22	22.000000000	22	22	::22
+23	2017-10-01	2017-10-01T00:00	Beijing	23	23	true	23	23	23.23	23.23	char23	23.000000000	23	23	::23
+24	2017-10-01	2017-10-01T00:00	Beijing	24	24	true	24	24	24.24	24.24	char24	24.000000000	24	24	::24
+25	2017-10-01	2017-10-01T00:00	Beijing	25	25	true	25	25	25.25	25.25	char25	25.000000000	25	25	::25
+26	2017-10-01	2017-10-01T00:00	Beijing	26	26	true	26	26	26.26	26.26	char26	26.000000000	26	26	::26
+27	2017-10-01	2017-10-01T00:00	Beijing	27	27	true	27	27	27.27	27.27	char27	27.000000000	27	27	::27
+28	2017-10-01	2017-10-01T00:00	Beijing	28	28	true	28	28	28.28	28.28	char28	28.000000000	28	28	::28
+29	2017-10-01	2017-10-01T00:00	Beijing	29	29	true	29	29	29.29	29.29	char29	29.000000000	29	29	::29
+3	2017-10-01	2017-10-01T00:00	Beijing	3	3	true	3	3	3.3	3.3	char3	3.000000000	3	3	::3
+30	2017-10-01	2017-10-01T00:00	Beijing	30	30	true	30	30	30.3	30.3	char30	30.000000000	30	30	::30
+31	2017-10-01	2017-10-01T00:00	Beijing	31	31	true	31	31	31.31	31.31	char31	31.000000000	31	31	::31
+32	2017-10-01	2017-10-01T00:00	Beijing	32	32	true	32	32	32.32	32.32	char32	32.000000000	32	32	::32
+33	2017-10-01	2017-10-01T00:00	Beijing	33	33	true	33	33	33.33	33.33	char33	33.000000000	33	33	::33
+34	2017-10-01	2017-10-01T00:00	Beijing	34	34	true	34	34	34.34	34.34	char34	34.000000000	34	34	::34
+35	2017-10-01	2017-10-01T00:00	Beijing	35	35	true	35	35	35.35	35.35	char35	35.000000000	35	35	::35
+36	2017-10-01	2017-10-01T00:00	Beijing	36	36	true	36	36	36.36	36.36	char36	36.000000000	36	36	::36
+37	2017-10-01	2017-10-01T00:00	Beijing	37	37	true	37	37	37.37	37.37	char37	37.000000000	37	37	::37
+38	2017-10-01	2017-10-01T00:00	Beijing	38	38	true	38	38	38.38	38.38	char38	38.000000000	38	38	::38
+39	2017-10-01	2017-10-01T00:00	Beijing	39	39	true	39	39	39.39	39.39	char39	39.000000000	39	39	::39
+4	2017-10-01	2017-10-01T00:00	Beijing	4	4	true	4	4	4.4	4.4	char4	4.000000000	4	4	::4
+40	2017-10-01	2017-10-01T00:00	Beijing	40	40	true	40	40	40.4	40.4	char40	40.000000000	40	40	::40
+41	2017-10-01	2017-10-01T00:00	Beijing	41	41	true	41	41	41.41	41.41	char41	41.000000000	41	41	::41
+42	2017-10-01	2017-10-01T00:00	Beijing	42	42	true	42	42	42.42	42.42	char42	42.000000000	42	42	::42
+43	2017-10-01	2017-10-01T00:00	Beijing	43	43	true	43	43	43.43	43.43	char43	43.000000000	43	43	::43
+44	2017-10-01	2017-10-01T00:00	Beijing	44	44	true	44	44	44.44	44.44	char44	44.000000000	44	44	::44
+45	2017-10-01	2017-10-01T00:00	Beijing	45	45	true	45	45	45.45	45.45	char45	45.000000000	45	45	::45
+46	2017-10-01	2017-10-01T00:00	Beijing	46	46	true	46	46	46.46	46.46	char46	46.000000000	46	46	::46
+47	2017-10-01	2017-10-01T00:00	Beijing	47	47	true	47	47	47.47	47.47	char47	47.000000000	47	47	::47
+48	2017-10-01	2017-10-01T00:00	Beijing	48	48	true	48	48	48.48	48.48	char48	48.000000000	48	48	::48
+49	2017-10-01	2017-10-01T00:00	Beijing	49	49	true	49	49	49.49	49.49	char49	49.000000000	49	49	::49
+5	2017-10-01	2017-10-01T00:00	Beijing	5	5	true	5	5	5.5	5.5	char5	5.000000000	5	5	::5
+50	2017-10-01	2017-10-01T00:00	Beijing	50	50	true	50	50	50.5	50.5	char50	50.000000000	50	50	::50
+51	2017-10-01	2017-10-01T00:00	Beijing	51	51	true	51	51	51.51	51.51	char51	51.000000000	51	51	::51
+52	2017-10-01	2017-10-01T00:00	Beijing	52	52	true	52	52	52.52	52.52	char52	52.000000000	52	52	::52
+53	2017-10-01	2017-10-01T00:00	Beijing	53	53	true	53	53	53.53	53.53	char53	53.000000000	53	53	::53
+54	2017-10-01	2017-10-01T00:00	Beijing	54	54	true	54	54	54.54	54.54	char54	54.000000000	54	54	::54
+55	2017-10-01	2017-10-01T00:00	Beijing	55	55	true	55	55	55.55	55.55	char55	55.000000000	55	55	::55
+56	2017-10-01	2017-10-01T00:00	Beijing	56	56	true	56	56	56.56	56.56	char56	56.000000000	56	56	::56
+57	2017-10-01	2017-10-01T00:00	Beijing	57	57	true	57	57	57.57	57.57	char57	57.000000000	57	57	::57
+58	2017-10-01	2017-10-01T00:00	Beijing	58	58	true	58	58	58.58	58.58	char58	58.000000000	58	58	::58
+59	2017-10-01	2017-10-01T00:00	Beijing	59	59	true	59	59	59.59	59.59	char59	59.000000000	59	59	::59
+6	2017-10-01	2017-10-01T00:00	Beijing	6	6	true	6	6	6.6	6.6	char6	6.000000000	6	6	::6
+60	2017-10-01	2017-10-01T00:00	Beijing	60	60	true	60	60	60.6	60.6	char60	60.000000000	60	60	::60
+61	2017-10-01	2017-10-01T00:00	Beijing	61	61	true	61	61	61.61	61.61	char61	61.000000000	61	61	::61
+62	2017-10-01	2017-10-01T00:00	Beijing	62	62	true	62	62	62.62	62.62	char62	62.000000000	62	62	::62
+63	2017-10-01	2017-10-01T00:00	Beijing	63	63	true	63	63	63.63	63.63	char63	63.000000000	63	63	::63
+64	2017-10-01	2017-10-01T00:00	Beijing	64	64	true	64	64	64.64	64.64	char64	64.000000000	64	64	::64
+65	2017-10-01	2017-10-01T00:00	Beijing	65	65	true	65	65	65.65	65.65	char65	65.000000000	65	65	::65
+66	2017-10-01	2017-10-01T00:00	Beijing	66	66	true	66	66	66.66	66.66	char66	66.000000000	66	66	::66
+67	2017-10-01	2017-10-01T00:00	Beijing	67	67	true	67	67	67.67	67.67	char67	67.000000000	67	67	::67
+68	2017-10-01	2017-10-01T00:00	Beijing	68	68	true	68	68	68.68	68.68	char68	68.000000000	68	68	::68
+69	2017-10-01	2017-10-01T00:00	Beijing	69	69	true	69	69	69.69	69.69	char69	69.000000000	69	69	::69
+7	2017-10-01	2017-10-01T00:00	Beijing	7	7	true	7	7	7.7	7.7	char7	7.000000000	7	7	::7
+70	2017-10-01	2017-10-01T00:00	Beijing	70	70	true	70	70	70.7	70.7	char70	70.000000000	70	70	::70
+71	2017-10-01	2017-10-01T00:00	Beijing	71	71	true	71	71	71.71	71.71	char71	71.000000000	71	71	::71
+72	2017-10-01	2017-10-01T00:00	Beijing	72	72	true	72	72	72.72	72.72	char72	72.000000000	72	72	::72
+73	2017-10-01	2017-10-01T00:00	Beijing	73	73	true	73	73	73.73	73.73	char73	73.000000000	73	73	::73
+74	2017-10-01	2017-10-01T00:00	Beijing	74	74	true	74	74	74.74	74.74	char74	74.000000000	74	74	::74
+75	2017-10-01	2017-10-01T00:00	Beijing	75	75	true	75	75	75.75	75.75	char75	75.000000000	75	75	::75
+76	2017-10-01	2017-10-01T00:00	Beijing	76	76	true	76	76	76.76	76.76	char76	76.000000000	76	76	::76
+77	2017-10-01	2017-10-01T00:00	Beijing	77	77	true	77	77	77.77	77.77	char77	77.000000000	77	77	::77
+78	2017-10-01	2017-10-01T00:00	Beijing	78	78	true	78	78	78.78	78.78	char78	78.000000000	78	78	::78
+79	2017-10-01	2017-10-01T00:00	Beijing	79	79	true	79	79	79.79	79.79	char79	79.000000000	79	79	::79
+8	2017-10-01	2017-10-01T00:00	Beijing	8	8	true	8	8	8.8	8.8	char8	8.000000000	8	8	::8
+80	2017-10-01	2017-10-01T00:00	Beijing	80	80	true	80	80	80.8	80.8	char80	80.000000000	80	80	::80
+81	2017-10-01	2017-10-01T00:00	Beijing	81	81	true	81	81	81.81	81.81	char81	81.000000000	81	81	::81
+82	2017-10-01	2017-10-01T00:00	Beijing	82	82	true	82	82	82.82	82.82	char82	82.000000000	82	82	::82
+83	2017-10-01	2017-10-01T00:00	Beijing	83	83	true	83	83	83.83	83.83	char83	83.000000000	83	83	::83
+84	2017-10-01	2017-10-01T00:00	Beijing	84	84	true	84	84	84.84	84.84	char84	84.000000000	84	84	::84
+85	2017-10-01	2017-10-01T00:00	Beijing	85	85	true	85	85	85.85	85.85	char85	85.000000000	85	85	::85
+86	2017-10-01	2017-10-01T00:00	Beijing	86	86	true	86	86	86.86	86.86	char86	86.000000000	86	86	::86
+87	2017-10-01	2017-10-01T00:00	Beijing	87	87	true	87	87	87.87	87.87	char87	87.000000000	87	87	::87
+88	2017-10-01	2017-10-01T00:00	Beijing	88	88	true	88	88	88.88	88.88	char88	88.000000000	88	88	::88
+89	2017-10-01	2017-10-01T00:00	Beijing	89	89	true	89	89	89.89	89.89	char89	89.000000000	89	89	::89
+9	2017-10-01	2017-10-01T00:00	Beijing	9	9	true	9	9	9.9	9.9	char9	9.000000000	9	9	::9
+90	2017-10-01	2017-10-01T00:00	Beijing	90	90	true	90	90	90.9	90.9	char90	90.000000000	90	90	::90
+91	2017-10-01	2017-10-01T00:00	Beijing	91	91	true	91	91	91.91	91.91	char91	91.000000000	91	91	::91
+92	2017-10-01	2017-10-01T00:00	Beijing	92	92	true	92	92	92.92	92.92	char92	92.000000000	92	92	::92
+93	2017-10-01	2017-10-01T00:00	Beijing	93	93	true	93	93	93.93	93.93	char93	93.000000000	93	93	::93
+94	2017-10-01	2017-10-01T00:00	Beijing	94	94	true	94	94	94.94	94.94	char94	94.000000000	94	94	::94
+95	2017-10-01	2017-10-01T00:00	Beijing	95	95	true	95	95	95.95	95.95	char95	95.000000000	95	95	::95
+96	2017-10-01	2017-10-01T00:00	Beijing	96	96	true	96	96	96.96	96.96	char96	96.000000000	96	96	::96
+97	2017-10-01	2017-10-01T00:00	Beijing	97	97	true	97	97	97.97	97.97	char97	97.000000000	97	97	::97
+98	2017-10-01	2017-10-01T00:00	Beijing	98	98	true	98	98	98.98	98.98	char98	98.000000000	98	98	::98
+99	2017-10-01	2017-10-01T00:00	Beijing	99	99	true	99	99	99.99	99.99	char99	99.000000000	99	99	::99
 
diff --git a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out
index 59e94ef9429ec9..784ad963ce4a72 100644
--- a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out
+++ b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.out
@@ -30,14 +30,14 @@
 8	nereids	\N
 
 -- !select_base2 --
-1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
-2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.9E-324	char2	100000000	100000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
-3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.4028235e+38	1.7976931348623157E308	char3	999999999	999999999	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
+1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1.000000000	1.000000000	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
+2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.940656458412465e-324	char2	100000000.000000000	100000000.000000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
+3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.402823E38	1.797693134862316e+308	char3	999999999.000000000	999999999.000000000	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
 
 -- !select_tvf2 --
-1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
-2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.9E-324	char2	100000000	100000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
-3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.4028235e+38	1.7976931348623157E308	char3	999999999	999999999	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
+1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1.000000000	1.000000000	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
+2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.940656458412465e-324	char2	100000000.000000000	100000000.000000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
+3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.402823E38	1.797693134862316e+308	char3	999999999.000000000	999999999.000000000	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
 
 -- !hive_docker_02 --
 1	2023-04-20	2023-04-20	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
@@ -75,14 +75,14 @@
 8	nereids	\N
 
 -- !select_base2 --
-1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
-2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.9E-324	char2	100000000	100000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
-3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.4028235e+38	1.7976931348623157E308	char3	999999999	999999999	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
+1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1.000000000	1.000000000	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
+2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.940656458412465e-324	char2	100000000.000000000	100000000.000000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
+3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.402823E38	1.797693134862316e+308	char3	999999999.000000000	999999999.000000000	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
 
 -- !select_tvf2 --
-1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
-2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.9E-324	char2	100000000	100000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
-3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.4028235e+38	1.7976931348623157E308	char3	999999999	999999999	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
+1	2023-04-20	2023-04-20	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	2023-04-20T00:00	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1.000000000	1.000000000	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
+2	9999-12-31	9999-12-31	9999-12-31T23:59:59	9999-12-31T23:59:59	2023-04-20T00:00:00.120	2023-04-20T00:00:00.334400		Haidian	-32768	-128	true	-2147483648	-9223372036854775808	-170141183460469231731687303715884105728	1.4E-45	4.940656458412465e-324	char2	100000000.000000000	100000000.000000000	4	0.1	0.99999999	9999999999.9999999999	99999999999999999999999999999999999999	9.9999999999999999999999999999999999999	0.99999999999999999999999999999999999999
+3	2023-04-21	2023-04-21	2023-04-20T12:34:56	2023-04-20T00:00	2023-04-20T00:00:00.123	2023-04-20T00:00:00.123456	Beijing		32767	127	true	2147483647	9223372036854775807	170141183460469231731687303715884105727	3.402823E38	1.797693134862316e+308	char3	999999999.000000000	999999999.000000000	9	0.9	9.99999999	1234567890.0123456789	12345678901234567890123456789012345678	1.2345678901234567890123456789012345678	0.12345678901234567890123456789012345678
 
 -- !hive_docker_02 --
 1	2023-04-20	2023-04-20	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	2023-04-19 16:00:00.0	Beijing	Haidian	1	1	true	1	1	1	1.1	1.1	char1	1	1	1	0.1	1.00000000	1.0000000000	1	1.0000000000000000000000000000000000000	0.10000000000000000000000000000000000000
diff --git a/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.out b/regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.out
similarity index 100%
rename from regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.out
rename to regression-test/data/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.out
diff --git a/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out b/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out
index 160c99248fe90c..9adea59bbfba3e 100644
--- a/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out
+++ b/regression-test/data/external_table_p0/hive/ddl/test_hive_ctas.out
@@ -199,203 +199,3 @@ true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
 -- !hive_docker_ctas_types_02 --
 true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
 
--- !ctas_01 --
-2
-3
-
--- !hive_docker_ctas_01 --
-2
-3
-
--- !ctas_02 --
-2
-3
-
--- !hive_docker_ctas_02 --
-2
-3
-
--- !ctas_03 --
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_03 --
-22	value_for_pt11	value_for_pt22
-
--- !ctas_04 --
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_04 --
-22	value_for_pt11	value_for_pt22
-
--- !ctas_05 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_05 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !ctas_06 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_06 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !ctas_ex01 --
-2
-3
-
--- !hive_docker_ctas_ex01 --
-2
-3
-
--- !ctas_ex02 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	\N
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_ex02 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	__HIVE_DEFAULT_PARTITION__
-22	value_for_pt11	value_for_pt22
-
--- !ctas_03 --
-\N	another string value for col2
-\N	string value for col2
-\N	yet another string value for col2
-
--- !hive_docker_ctas_ex03 --
-\N	another string value for col2
-\N	string value for col2
-\N	yet another string value for col2
-
--- !ctas_04 --
-\N	11	value_for_pt1
-\N	22	value_for_pt11
-
--- !hive_docker_ctas_ex04 --
-\N	11	value_for_pt1
-\N	22	value_for_pt11
-
--- !qualified_table1 --
-11	value_for_pt1
-22	value_for_pt11
-
--- !qualified_table2 --
-11	value_for_pt1
-22	value_for_pt11
-
--- !ctas_types_01 --
-true	127	32767	2147483647	9223372036854775807	default	22.12345	3.141592653	99999.9999	default	default	2023-05-29	2023-05-29T23:19:34
-
--- !hive_docker_ctas_types_01 --
-true	127	32767	2147483647	9223372036854775807	default	22.12345	3.141592653	99999.9999	default	default	2023-05-29	2023-05-29 23:19:34.0
-
--- !ctas_types_02 --
-true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
-
--- !hive_docker_ctas_types_02 --
-true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
-
--- !ctas_01 --
-2
-3
-
--- !hive_docker_ctas_01 --
-2
-3
-
--- !ctas_02 --
-2
-3
-
--- !hive_docker_ctas_02 --
-2
-3
-
--- !ctas_03 --
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_03 --
-22	value_for_pt11	value_for_pt22
-
--- !ctas_04 --
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_04 --
-22	value_for_pt11	value_for_pt22
-
--- !ctas_05 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_05 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !ctas_06 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_06 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	value_for_pt22
-
--- !ctas_ex01 --
-2
-3
-
--- !hive_docker_ctas_ex01 --
-2
-3
-
--- !ctas_ex02 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	\N
-22	value_for_pt11	value_for_pt22
-
--- !hive_docker_ctas_ex02 --
-11	value_for_pt1	value_for_pt2
-22	value_for_pt11	__HIVE_DEFAULT_PARTITION__
-22	value_for_pt11	value_for_pt22
-
--- !ctas_03 --
-\N	another string value for col2
-\N	string value for col2
-\N	yet another string value for col2
-
--- !hive_docker_ctas_ex03 --
-\N	another string value for col2
-\N	string value for col2
-\N	yet another string value for col2
-
--- !ctas_04 --
-\N	11	value_for_pt1
-\N	22	value_for_pt11
-
--- !hive_docker_ctas_ex04 --
-\N	11	value_for_pt1
-\N	22	value_for_pt11
-
--- !qualified_table1 --
-11	value_for_pt1
-22	value_for_pt11
-
--- !qualified_table2 --
-11	value_for_pt1
-22	value_for_pt11
-
--- !ctas_types_01 --
-true	127	32767	2147483647	9223372036854775807	default	22.12345	3.141592653	99999.9999	default	default	2023-05-29	2023-05-29T23:19:34
-
--- !hive_docker_ctas_types_01 --
-true	127	32767	2147483647	9223372036854775807	default	22.12345	3.141592653	99999.9999	default	default	2023-05-29	2023-05-29 23:19:34.0
-
--- !ctas_types_02 --
-true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
-
--- !hive_docker_ctas_types_02 --
-true	127	32767	2147483647	default	22.12345	3.141592653	99999.9999	default
-
diff --git a/regression-test/data/external_table_p0/hive/test_complex_types.out b/regression-test/data/external_table_p0/hive/test_complex_types.out
index 4a9dbbe835c7fc..5f81514ea59b23 100644
--- a/regression-test/data/external_table_p0/hive/test_complex_types.out
+++ b/regression-test/data/external_table_p0/hive/test_complex_types.out
@@ -47,51 +47,3 @@
 -- !date_dict --
 2036-12-28	1898-12-28	2539-12-28
 
--- !null_element_at --
-0
-
--- !map_key_select --
-38111	0.770169659057425
-
--- !map_keys --
-["9wXr9n-TBm9Wyt-r8H-SkAq", "CPDH4G-ZXGPkku-3wY-ktaQ", "RvNlMt-HHjHN5M-VjP-xHAI", "qKIhKy-Ws344os-haX-2pmT", "DOJJ5l-UEkwVMs-x9F-HifD", "m871g8-1eFi7jt-oBq-S0yc", "wXugVP-v2fc6IF-DeU-On3T", "B0mXFX-QvgUgo7-Dih-6rDu", "E9zv3F-xMqSbMa-il4-FuDg", "msuFIN-ZkKO8TY-tu4-veH0", "0rSUyl-Un07aIW-KAx-WHnX", "XvbmO8-WA6oAqc-ihc-s8IL", "G6B6RD-AicAlZb-16u-Pn1I", "coDK0Q-tMg1294-JMQ-ZWQu", "4c0aWh-yhL6BOX-rRu-1n0r", "G4iUcG-ZhWw62v-VLt-n6lH", "IIB7qD-WQistwT-Vux-0c9B", "7cTyuR-5ssXm2S-sJR-JTIZ", "3KPhSW-FICEImf-bba-PCiQ", "qQ7Yup-XBeQGFz-3EP-q0vd", "gjRxRo-Af9Oqx5-IzN-3B9d", "1zSj57-nNZpZ0b-ZKn-BeY0", "sTK0mn-wkp1Xp5-PRS-txVM", "sLrM0s-1KnXLb6-1A3-Z1vJ", "UkYdkP-k7YKiKS-Fxp-qAcI", "v8p0YV-R5pAKZ8-UMr-P1bQ", "RJdTav-jk3os9Z-yRk-WhwV", "lB91ic-pNFZkE4-hBx-e104", "gmRV6e-GKJUg0L-ok7-J6Lz", "o3LUyz-7Toh54O-czG-Xep8", "8fzHhM-4otPAss-qTm-phg8", "kZsHhe-vfClpAR-b3H-7aHl", "TdZnlG-BUgMs7Z-iBM-9c3v", "RipJXn-p4gZkyy-1ZY-xkWe", "ke730M-LmMjGdc-EFy-0LUK", "jBSExJ-GXTc5TB-NSa-xBEd", "kI7Cc8-DSg5RdF-qLo-2bhe", "bAn3VI-x6xXWpB-zWe-G5CJ", "jAil30-kbt6K6z-kbr-8foB", "IHIwNs-1QGqy8l-i8i-vu4G", "p0IbZr-tHCtwiV-0hq-NtIt", "iggdij-M3YNBpd-yiD-a8Ro", "BrJEww-C4LpgaS-AeB-So4U", "xnO3Fi-8rXcpgj-zpm-EmuX", "5w57da-phYtDUx-px2-6frG", "31MfFs-1WyUAr6-gQ0-xLxY", "ryBl2p-rSoPhwd-WPv-NCAU", "KN5TEt-gOfJ4Hy-3pp-HiBa", "ytqxb8-utXXjUf-m41-i6ir", "WhGUGz-zzyvEpD-9BM-2bVf", "dE1tFe-zHClt4u-0cY-TQnC", "MveBhC-g29c0dU-tCT-R6nC", "JTpxue-xSqAhGo-AZk-zB1t", "92TVdU-qDJesPN-0lb-JOd3", "0PODnh-IciBdOZ-0CS-oNeL", "KkkW6x-TiemXQw-OiH-dZ9s", "PIs5Aj-g02HRXw-957-GD2z", "yJIzuw-au6460e-0Tl-XYEJ", "KHvMCD-OQDL0eX-nqK-TmEt", "6QJJgV-Z3IZ1Rf-wyv-rIJ6", "qA9ycc-sR2qm6P-PtB-AIax", "uDeuEb-B0t0Ljr-dWk-jkC4", "5vPy52-ygN0MMH-UB4-nZQL", "zbbmrQ-pT3uAuU-Kae-HjM5", "3QShHS-7RwUB10-0W2-H4Qy", "PMc4QI-5lNajXU-f8m-RGIi", "O9t3dl-q8YHozj-saR-A3Jm", "k4eH3O-aHnTKY7-ADp-4Vsi", "RA4epe-lWWnOff-bpM-bSR4", "6ysu2R-gSc5dwU-cv0-LqCJ", "tVl3TY-o42NMVO-k3S-iqOY", "NMgTrr-W1RrCvP-Zaf-paL7", "d1CJmF-CeG5asM-xms-1dwN", "N1D30g-zFjiGzI-eHC-Sof4", "tOhfKu-Gdtf9Ne-KwA-JdHV", "XLzwK0-6ocGDrS-TtU-wlEI", "XDgZfb-Sxc45Zn-mVO-S2QO", "GQD7a0-fnt9BZs-Kvh-dPbJ", "9dJxj9-HFwEQMY-6p9-s8Vt", "1qU9pA-QJGAna9-JoG-H7GS", "rKIkxA-UnGWYSn-0li-ziuB", "tbPazx-IjUrQ8J-NZe-VOPL", "xBpSIv-U6ojkK7-9p5-LviD", "88bnWI-pxrKa7T-n2d-tXk9", "0XviXp-9ksT8s0-fDy-35SW", "e0XauA-GNRALmd-SM2-Y4Gf", "kyvYBk-Bk5M4Xq-gxX-kE1B", "dIiQzS-5sT4ogL-6IV-tLmb", "OlGOyH-dyL1nzj-B2M-z8ir", "zC9Gtn-x8hpfPD-KOu-k31W", "qSq3z2-Lpv0YcB-hBq-Sabd", "LSyNyi-tBZUx1l-hAj-mwsx", "2c9aTP-hXloMK7-ufH-dgq6", "aXksHO-zARQxfo-sgS-8Bf4", "ioOXAL-eVUF0W8-vZx-ZeYX", "DXUkAP-A7SqnHj-V4U-PJfz", "cnzZXk-AOMepfN-hym-qbDH", "CMlAd6-8FF1yXs-fae-Izfv", "qiXnUv-e2PsJWm-tLF-KpjE", "Gfx3k9-JvXa7Wd-rI1-1e1E"]
-
--- !map_values --
-[0.9805502029231666, 0.5330291595754054, 0.3002474487337981, 0.4856360175030267, 0.7687106425158624, 0.6993506644925102, 0.2849354808825807, 0.3473417455186141, 0.1350012944304507, 0.9708132103700939, 0.1858304263994345, 0.4886337264552073, 0.3635474169515766, 0.5640845268971175, 0.1374134087807577, 0.7766547647451623, 0.5835323296668318, 0.3654459547110349, 0.5479776709993764, 0.8379932542117192, 0.1566504627835081, 0.03371222042250388, 0.1699781825927229, 0.3579630495075078, 0.02809253185597727, 0.7204247029840027, 0.2760499256423206, 0.676890893219096, 0.03529878656700025, 0.02276578351027858, 0.09794991730625469, 0.5278062884613351, 0.1370404181139102, 0.5440352476580856, 0.7205540629419929, 0.1350852984195943, 0.4160946400431862, 0.2972295454562929, 0.9217426503585693, 0.58103998733474, 0.8845427436377473, 0.1017928267299423, 0.9547186973943892, 0.1680102784708342, 0.0008487745421986714, 0.1695241541106989, 0.6783921749433292, 0.7193818386971084, 0.930443435029246, 0.4846665469390518, 0.9924998940864419, 0.7238288481079148, 0.7053563817759009, 0.9735160772776755, 0.7782499787869234, 0.7413304280548174, 0.7550983926033307, 0.8713660446322186, 0.9205209678792637, 0.3419724898972277, 0.3696806985755556, 0.03023259817152302, 0.02477452604862684, 0.9764129157525588, 0.5933057559470283, 0.7612511554831843, 0.378758227033635, 0.9312730459544121, 0.6712083507802412, 0.165080800084368, 0.2292866463959062, 0.3736665350268106, 0.2048064464080658, 0.08394355937496834, 0.8494979696731824, 0.4321556255662622, 0.3534668267198027, 0.8791700434102772, 0.2274527583015258, 0.04886968507359402, 0.7936598110174163, 0.5449717343415919, 0.7635939445968348, 0.08505586183986624, 0.3509115026589145, 0.9633191745238908, 0.3972533910389617, 0.4659759249919267, 0.1579051246328464, 0.7853565578107594, 0.9894919939745654, 0.9395365730655929, 0.202260767382666, 0.1619636856192768, 0.5105569529841616, 0.4531109229280732, 0.2579134268597084, 0.7962109089915747, 0.2772969229539421, 0.9315902037607061]
-
--- !map_contains_key --
-1077	[0.7805560995873845, 0.9303489002269559, 0.2529522997521877, 0.662270811026298, 0.664725297532439, 0.1019441091764477, 0.9614059300688174, 0.5278126009983843, 0.5287505841216708, 0.426116738236779, 0.4230050239387118, 0.5327026330053651, 0.6025481777942603, 0.2710733647257627, 0.613792118138183, 0.002100302783562991, 0.3200675048728582, 0.5485611014660204, 0.5121510581313707, 0.5145136652805358]	{"9wXr9n-TBm9Wyt-r8H-SkAq":0.9338329010480995, "CPDH4G-ZXGPkku-3wY-ktaQ":0.4355256963350881, "RvNlMt-HHjHN5M-VjP-xHAI":0.3263474611804782, "qKIhKy-Ws344os-haX-2pmT":0.565450203625137, "DOJJ5l-UEkwVMs-x9F-HifD":0.09375622010822238, "m871g8-1eFi7jt-oBq-S0yc":0.8819687247951038, "wXugVP-v2fc6IF-DeU-On3T":0.3448233486447311, "B0mXFX-QvgUgo7-Dih-6rDu":0.1914040395475467, "E9zv3F-xMqSbMa-il4-FuDg":0.3857021891084336, "msuFIN-ZkKO8TY-tu4-veH0":0.6646172653074628, "0rSUyl-Un07aIW-KAx-WHnX":0.3558009910430974, "XvbmO8-WA6oAqc-ihc-s8IL":0.4058206434411423, "G6B6RD-AicAlZb-16u-Pn1I":0.7203554946895749, "coDK0Q-tMg1294-JMQ-ZWQu":0.8236328627743186, "4c0aWh-yhL6BOX-rRu-1n0r":0.1398091184230428, "G4iUcG-ZhWw62v-VLt-n6lH":0.1838288978254214, "IIB7qD-WQistwT-Vux-0c9B":0.9174389144309458, "7cTyuR-5ssXm2S-sJR-JTIZ":0.8132237242672837, "3KPhSW-FICEImf-bba-PCiQ":0.6302643579943553, "qQ7Yup-XBeQGFz-3EP-q0vd":0.6109025726752364, "gjRxRo-Af9Oqx5-IzN-3B9d":0.9251468490326916, "1zSj57-nNZpZ0b-ZKn-BeY0":0.5628463109107144, "sTK0mn-wkp1Xp5-PRS-txVM":0.7905808129559996, "sLrM0s-1KnXLb6-1A3-Z1vJ":0.4234598677670157, "UkYdkP-k7YKiKS-Fxp-qAcI":0.7541401266679869, "v8p0YV-R5pAKZ8-UMr-P1bQ":0.2931152565110683, "RJdTav-jk3os9Z-yRk-WhwV":0.5263811309738877, "lB91ic-pNFZkE4-hBx-e104":0.6692292834321788, "gmRV6e-GKJUg0L-ok7-J6Lz":0.05924766959664352, "o3LUyz-7Toh54O-czG-Xep8":0.6284193821127264, "8fzHhM-4otPAss-qTm-phg8":0.8953002441537012, "kZsHhe-vfClpAR-b3H-7aHl":0.1775015612747399, "TdZnlG-BUgMs7Z-iBM-9c3v":0.2749839439504633, "RipJXn-p4gZkyy-1ZY-xkWe":0.05461626895038973, "ke730M-LmMjGdc-EFy-0LUK":0.3078176183644828, "jBSExJ-GXTc5TB-NSa-xBEd":0.6617827850054024, "kI7Cc8-DSg5RdF-qLo-2bhe":0.9835707461323488, "bAn3VI-x6xXWpB-zWe-G5CJ":0.2179821229979456, "jAil30-kbt6K6z-kbr-8foB":0.9788066977245138, "IHIwNs-1QGqy8l-i8i-vu4G":0.4967939741245059, "p0IbZr-tHCtwiV-0hq-NtIt":0.05018379510905702, "iggdij-M3YNBpd-yiD-a8Ro":0.982385582884686, "BrJEww-C4LpgaS-AeB-So4U":0.9024855415553655, "xnO3Fi-8rXcpgj-zpm-EmuX":0.2052911881746857, "5w57da-phYtDUx-px2-6frG":0.2969063879156597, "31MfFs-1WyUAr6-gQ0-xLxY":0.4879555128313509, "ryBl2p-rSoPhwd-WPv-NCAU":0.7954485484495807, "KN5TEt-gOfJ4Hy-3pp-HiBa":0.1533389643648807, "ytqxb8-utXXjUf-m41-i6ir":0.6150208673719357, "WhGUGz-zzyvEpD-9BM-2bVf":0.581040090228354, "dE1tFe-zHClt4u-0cY-TQnC":0.7608999632369456, "MveBhC-g29c0dU-tCT-R6nC":0.3345734028221851, "JTpxue-xSqAhGo-AZk-zB1t":0.3504030277488054, "92TVdU-qDJesPN-0lb-JOd3":0.7387694998319805, "0PODnh-IciBdOZ-0CS-oNeL":0.9515905965769644, "KkkW6x-TiemXQw-OiH-dZ9s":0.4082412331999081, "PIs5Aj-g02HRXw-957-GD2z":0.641526116451016, "yJIzuw-au6460e-0Tl-XYEJ":0.7521928530356236, "KHvMCD-OQDL0eX-nqK-TmEt":0.1309616727896826, "6QJJgV-Z3IZ1Rf-wyv-rIJ6":0.7007110387725962, "qA9ycc-sR2qm6P-PtB-AIax":0.4462977655645909, "uDeuEb-B0t0Ljr-dWk-jkC4":0.6904672767407958, "5vPy52-ygN0MMH-UB4-nZQL":0.6057596542200021, "zbbmrQ-pT3uAuU-Kae-HjM5":0.9812657498686279, "3QShHS-7RwUB10-0W2-H4Qy":0.4155760848860853, "PMc4QI-5lNajXU-f8m-RGIi":0.7046420976800288, "O9t3dl-q8YHozj-saR-A3Jm":0.8543344954196586, "k4eH3O-aHnTKY7-ADp-4Vsi":0.2655832454718557, "RA4epe-lWWnOff-bpM-bSR4":0.7523252210222883, "6ysu2R-gSc5dwU-cv0-LqCJ":0.7830899322716732, "tVl3TY-o42NMVO-k3S-iqOY":0.7923823401215799, "NMgTrr-W1RrCvP-Zaf-paL7":0.4686928654756936, "d1CJmF-CeG5asM-xms-1dwN":0.7622908781076493, "N1D30g-zFjiGzI-eHC-Sof4":0.847542878440137, "tOhfKu-Gdtf9Ne-KwA-JdHV":0.4999285217445154, "XLzwK0-6ocGDrS-TtU-wlEI":0.3985354402705095, "XDgZfb-Sxc45Zn-mVO-S2QO":0.05791580337644187, "GQD7a0-fnt9BZs-Kvh-dPbJ":0.663903859916476, "9dJxj9-HFwEQMY-6p9-s8Vt":0.2194407595305434, "1qU9pA-QJGAna9-JoG-H7GS":0.8877401947295382, "rKIkxA-UnGWYSn-0li-ziuB":0.1607906275036466, "tbPazx-IjUrQ8J-NZe-VOPL":0.6809166916797593, "xBpSIv-U6ojkK7-9p5-LviD":0.1195672647379901, "88bnWI-pxrKa7T-n2d-tXk9":0.1956068951787721, "0XviXp-9ksT8s0-fDy-35SW":0.8690659418822626, "e0XauA-GNRALmd-SM2-Y4Gf":0.6840816888752089, "kyvYBk-Bk5M4Xq-gxX-kE1B":0.7744771682336401, "dIiQzS-5sT4ogL-6IV-tLmb":0.0340772833497166, "OlGOyH-dyL1nzj-B2M-z8ir":0.3765608037933722, "zC9Gtn-x8hpfPD-KOu-k31W":0.864392047887076, "qSq3z2-Lpv0YcB-hBq-Sabd":0.1542847609246678, "LSyNyi-tBZUx1l-hAj-mwsx":0.304034328298701, "2c9aTP-hXloMK7-ufH-dgq6":0.1016852552953107, "aXksHO-zARQxfo-sgS-8Bf4":0.5490533082019959, "ioOXAL-eVUF0W8-vZx-ZeYX":0.4528164038481785, "DXUkAP-A7SqnHj-V4U-PJfz":0.3607407447425939, "cnzZXk-AOMepfN-hym-qbDH":0.4587361500592568, "CMlAd6-8FF1yXs-fae-Izfv":0.07555019720825917, "qiXnUv-e2PsJWm-tLF-KpjE":0.9409681065363688, "Gfx3k9-JvXa7Wd-rI1-1e1E":0.7492793312178226}	{"name":"r8HXXQM4XHoI", "age":238221053, "tip":"2023-07-26 15:40:37.694000"}
-
--- !array_max --
-11028
-
--- !null_element_at_orc --
-0
-
--- !map_key_select_orc --
-38111	0.770169659057425
-
--- !map_keys_orc --
-["9wXr9n-TBm9Wyt-r8H-SkAq", "CPDH4G-ZXGPkku-3wY-ktaQ", "RvNlMt-HHjHN5M-VjP-xHAI", "qKIhKy-Ws344os-haX-2pmT", "DOJJ5l-UEkwVMs-x9F-HifD", "m871g8-1eFi7jt-oBq-S0yc", "wXugVP-v2fc6IF-DeU-On3T", "B0mXFX-QvgUgo7-Dih-6rDu", "E9zv3F-xMqSbMa-il4-FuDg", "msuFIN-ZkKO8TY-tu4-veH0", "0rSUyl-Un07aIW-KAx-WHnX", "XvbmO8-WA6oAqc-ihc-s8IL", "G6B6RD-AicAlZb-16u-Pn1I", "coDK0Q-tMg1294-JMQ-ZWQu", "4c0aWh-yhL6BOX-rRu-1n0r", "G4iUcG-ZhWw62v-VLt-n6lH", "IIB7qD-WQistwT-Vux-0c9B", "7cTyuR-5ssXm2S-sJR-JTIZ", "3KPhSW-FICEImf-bba-PCiQ", "qQ7Yup-XBeQGFz-3EP-q0vd", "gjRxRo-Af9Oqx5-IzN-3B9d", "1zSj57-nNZpZ0b-ZKn-BeY0", "sTK0mn-wkp1Xp5-PRS-txVM", "sLrM0s-1KnXLb6-1A3-Z1vJ", "UkYdkP-k7YKiKS-Fxp-qAcI", "v8p0YV-R5pAKZ8-UMr-P1bQ", "RJdTav-jk3os9Z-yRk-WhwV", "lB91ic-pNFZkE4-hBx-e104", "gmRV6e-GKJUg0L-ok7-J6Lz", "o3LUyz-7Toh54O-czG-Xep8", "8fzHhM-4otPAss-qTm-phg8", "kZsHhe-vfClpAR-b3H-7aHl", "TdZnlG-BUgMs7Z-iBM-9c3v", "RipJXn-p4gZkyy-1ZY-xkWe", "ke730M-LmMjGdc-EFy-0LUK", "jBSExJ-GXTc5TB-NSa-xBEd", "kI7Cc8-DSg5RdF-qLo-2bhe", "bAn3VI-x6xXWpB-zWe-G5CJ", "jAil30-kbt6K6z-kbr-8foB", "IHIwNs-1QGqy8l-i8i-vu4G", "p0IbZr-tHCtwiV-0hq-NtIt", "iggdij-M3YNBpd-yiD-a8Ro", "BrJEww-C4LpgaS-AeB-So4U", "xnO3Fi-8rXcpgj-zpm-EmuX", "5w57da-phYtDUx-px2-6frG", "31MfFs-1WyUAr6-gQ0-xLxY", "ryBl2p-rSoPhwd-WPv-NCAU", "KN5TEt-gOfJ4Hy-3pp-HiBa", "ytqxb8-utXXjUf-m41-i6ir", "WhGUGz-zzyvEpD-9BM-2bVf", "dE1tFe-zHClt4u-0cY-TQnC", "MveBhC-g29c0dU-tCT-R6nC", "JTpxue-xSqAhGo-AZk-zB1t", "92TVdU-qDJesPN-0lb-JOd3", "0PODnh-IciBdOZ-0CS-oNeL", "KkkW6x-TiemXQw-OiH-dZ9s", "PIs5Aj-g02HRXw-957-GD2z", "yJIzuw-au6460e-0Tl-XYEJ", "KHvMCD-OQDL0eX-nqK-TmEt", "6QJJgV-Z3IZ1Rf-wyv-rIJ6", "qA9ycc-sR2qm6P-PtB-AIax", "uDeuEb-B0t0Ljr-dWk-jkC4", "5vPy52-ygN0MMH-UB4-nZQL", "zbbmrQ-pT3uAuU-Kae-HjM5", "3QShHS-7RwUB10-0W2-H4Qy", "PMc4QI-5lNajXU-f8m-RGIi", "O9t3dl-q8YHozj-saR-A3Jm", "k4eH3O-aHnTKY7-ADp-4Vsi", "RA4epe-lWWnOff-bpM-bSR4", "6ysu2R-gSc5dwU-cv0-LqCJ", "tVl3TY-o42NMVO-k3S-iqOY", "NMgTrr-W1RrCvP-Zaf-paL7", "d1CJmF-CeG5asM-xms-1dwN", "N1D30g-zFjiGzI-eHC-Sof4", "tOhfKu-Gdtf9Ne-KwA-JdHV", "XLzwK0-6ocGDrS-TtU-wlEI", "XDgZfb-Sxc45Zn-mVO-S2QO", "GQD7a0-fnt9BZs-Kvh-dPbJ", "9dJxj9-HFwEQMY-6p9-s8Vt", "1qU9pA-QJGAna9-JoG-H7GS", "rKIkxA-UnGWYSn-0li-ziuB", "tbPazx-IjUrQ8J-NZe-VOPL", "xBpSIv-U6ojkK7-9p5-LviD", "88bnWI-pxrKa7T-n2d-tXk9", "0XviXp-9ksT8s0-fDy-35SW", "e0XauA-GNRALmd-SM2-Y4Gf", "kyvYBk-Bk5M4Xq-gxX-kE1B", "dIiQzS-5sT4ogL-6IV-tLmb", "OlGOyH-dyL1nzj-B2M-z8ir", "zC9Gtn-x8hpfPD-KOu-k31W", "qSq3z2-Lpv0YcB-hBq-Sabd", "LSyNyi-tBZUx1l-hAj-mwsx", "2c9aTP-hXloMK7-ufH-dgq6", "aXksHO-zARQxfo-sgS-8Bf4", "ioOXAL-eVUF0W8-vZx-ZeYX", "DXUkAP-A7SqnHj-V4U-PJfz", "cnzZXk-AOMepfN-hym-qbDH", "CMlAd6-8FF1yXs-fae-Izfv", "qiXnUv-e2PsJWm-tLF-KpjE", "Gfx3k9-JvXa7Wd-rI1-1e1E"]
-
--- !map_values_orc --
-[0.9805502029231666, 0.5330291595754054, 0.3002474487337981, 0.4856360175030267, 0.7687106425158624, 0.6993506644925102, 0.2849354808825807, 0.3473417455186141, 0.1350012944304507, 0.9708132103700939, 0.1858304263994345, 0.4886337264552073, 0.3635474169515766, 0.5640845268971175, 0.1374134087807577, 0.7766547647451623, 0.5835323296668318, 0.3654459547110349, 0.5479776709993764, 0.8379932542117192, 0.1566504627835081, 0.03371222042250388, 0.1699781825927229, 0.3579630495075078, 0.02809253185597727, 0.7204247029840027, 0.2760499256423206, 0.676890893219096, 0.03529878656700025, 0.02276578351027858, 0.09794991730625469, 0.5278062884613351, 0.1370404181139102, 0.5440352476580856, 0.7205540629419929, 0.1350852984195943, 0.4160946400431862, 0.2972295454562929, 0.9217426503585693, 0.58103998733474, 0.8845427436377473, 0.1017928267299423, 0.9547186973943892, 0.1680102784708342, 0.0008487745421986714, 0.1695241541106989, 0.6783921749433292, 0.7193818386971084, 0.930443435029246, 0.4846665469390518, 0.9924998940864419, 0.7238288481079148, 0.7053563817759009, 0.9735160772776755, 0.7782499787869234, 0.7413304280548174, 0.7550983926033307, 0.8713660446322186, 0.9205209678792637, 0.3419724898972277, 0.3696806985755556, 0.03023259817152302, 0.02477452604862684, 0.9764129157525588, 0.5933057559470283, 0.7612511554831843, 0.378758227033635, 0.9312730459544121, 0.6712083507802412, 0.165080800084368, 0.2292866463959062, 0.3736665350268106, 0.2048064464080658, 0.08394355937496834, 0.8494979696731824, 0.4321556255662622, 0.3534668267198027, 0.8791700434102772, 0.2274527583015258, 0.04886968507359402, 0.7936598110174163, 0.5449717343415919, 0.7635939445968348, 0.08505586183986624, 0.3509115026589145, 0.9633191745238908, 0.3972533910389617, 0.4659759249919267, 0.1579051246328464, 0.7853565578107594, 0.9894919939745654, 0.9395365730655929, 0.202260767382666, 0.1619636856192768, 0.5105569529841616, 0.4531109229280732, 0.2579134268597084, 0.7962109089915747, 0.2772969229539421, 0.9315902037607061]
-
--- !map_contains_key_orc --
-1077	[0.7805560995873845, 0.9303489002269559, 0.2529522997521877, 0.662270811026298, 0.664725297532439, 0.1019441091764477, 0.9614059300688174, 0.5278126009983843, 0.5287505841216708, 0.426116738236779, 0.4230050239387118, 0.5327026330053651, 0.6025481777942603, 0.2710733647257627, 0.613792118138183, 0.002100302783562991, 0.3200675048728582, 0.5485611014660204, 0.5121510581313707, 0.5145136652805358]	{"9wXr9n-TBm9Wyt-r8H-SkAq":0.9338329010480995, "CPDH4G-ZXGPkku-3wY-ktaQ":0.4355256963350881, "RvNlMt-HHjHN5M-VjP-xHAI":0.3263474611804782, "qKIhKy-Ws344os-haX-2pmT":0.565450203625137, "DOJJ5l-UEkwVMs-x9F-HifD":0.09375622010822238, "m871g8-1eFi7jt-oBq-S0yc":0.8819687247951038, "wXugVP-v2fc6IF-DeU-On3T":0.3448233486447311, "B0mXFX-QvgUgo7-Dih-6rDu":0.1914040395475467, "E9zv3F-xMqSbMa-il4-FuDg":0.3857021891084336, "msuFIN-ZkKO8TY-tu4-veH0":0.6646172653074628, "0rSUyl-Un07aIW-KAx-WHnX":0.3558009910430974, "XvbmO8-WA6oAqc-ihc-s8IL":0.4058206434411423, "G6B6RD-AicAlZb-16u-Pn1I":0.7203554946895749, "coDK0Q-tMg1294-JMQ-ZWQu":0.8236328627743186, "4c0aWh-yhL6BOX-rRu-1n0r":0.1398091184230428, "G4iUcG-ZhWw62v-VLt-n6lH":0.1838288978254214, "IIB7qD-WQistwT-Vux-0c9B":0.9174389144309458, "7cTyuR-5ssXm2S-sJR-JTIZ":0.8132237242672837, "3KPhSW-FICEImf-bba-PCiQ":0.6302643579943553, "qQ7Yup-XBeQGFz-3EP-q0vd":0.6109025726752364, "gjRxRo-Af9Oqx5-IzN-3B9d":0.9251468490326916, "1zSj57-nNZpZ0b-ZKn-BeY0":0.5628463109107144, "sTK0mn-wkp1Xp5-PRS-txVM":0.7905808129559996, "sLrM0s-1KnXLb6-1A3-Z1vJ":0.4234598677670157, "UkYdkP-k7YKiKS-Fxp-qAcI":0.7541401266679869, "v8p0YV-R5pAKZ8-UMr-P1bQ":0.2931152565110683, "RJdTav-jk3os9Z-yRk-WhwV":0.5263811309738877, "lB91ic-pNFZkE4-hBx-e104":0.6692292834321788, "gmRV6e-GKJUg0L-ok7-J6Lz":0.05924766959664352, "o3LUyz-7Toh54O-czG-Xep8":0.6284193821127264, "8fzHhM-4otPAss-qTm-phg8":0.8953002441537012, "kZsHhe-vfClpAR-b3H-7aHl":0.1775015612747399, "TdZnlG-BUgMs7Z-iBM-9c3v":0.2749839439504633, "RipJXn-p4gZkyy-1ZY-xkWe":0.05461626895038973, "ke730M-LmMjGdc-EFy-0LUK":0.3078176183644828, "jBSExJ-GXTc5TB-NSa-xBEd":0.6617827850054024, "kI7Cc8-DSg5RdF-qLo-2bhe":0.9835707461323488, "bAn3VI-x6xXWpB-zWe-G5CJ":0.2179821229979456, "jAil30-kbt6K6z-kbr-8foB":0.9788066977245138, "IHIwNs-1QGqy8l-i8i-vu4G":0.4967939741245059, "p0IbZr-tHCtwiV-0hq-NtIt":0.05018379510905702, "iggdij-M3YNBpd-yiD-a8Ro":0.982385582884686, "BrJEww-C4LpgaS-AeB-So4U":0.9024855415553655, "xnO3Fi-8rXcpgj-zpm-EmuX":0.2052911881746857, "5w57da-phYtDUx-px2-6frG":0.2969063879156597, "31MfFs-1WyUAr6-gQ0-xLxY":0.4879555128313509, "ryBl2p-rSoPhwd-WPv-NCAU":0.7954485484495807, "KN5TEt-gOfJ4Hy-3pp-HiBa":0.1533389643648807, "ytqxb8-utXXjUf-m41-i6ir":0.6150208673719357, "WhGUGz-zzyvEpD-9BM-2bVf":0.581040090228354, "dE1tFe-zHClt4u-0cY-TQnC":0.7608999632369456, "MveBhC-g29c0dU-tCT-R6nC":0.3345734028221851, "JTpxue-xSqAhGo-AZk-zB1t":0.3504030277488054, "92TVdU-qDJesPN-0lb-JOd3":0.7387694998319805, "0PODnh-IciBdOZ-0CS-oNeL":0.9515905965769644, "KkkW6x-TiemXQw-OiH-dZ9s":0.4082412331999081, "PIs5Aj-g02HRXw-957-GD2z":0.641526116451016, "yJIzuw-au6460e-0Tl-XYEJ":0.7521928530356236, "KHvMCD-OQDL0eX-nqK-TmEt":0.1309616727896826, "6QJJgV-Z3IZ1Rf-wyv-rIJ6":0.7007110387725962, "qA9ycc-sR2qm6P-PtB-AIax":0.4462977655645909, "uDeuEb-B0t0Ljr-dWk-jkC4":0.6904672767407958, "5vPy52-ygN0MMH-UB4-nZQL":0.6057596542200021, "zbbmrQ-pT3uAuU-Kae-HjM5":0.9812657498686279, "3QShHS-7RwUB10-0W2-H4Qy":0.4155760848860853, "PMc4QI-5lNajXU-f8m-RGIi":0.7046420976800288, "O9t3dl-q8YHozj-saR-A3Jm":0.8543344954196586, "k4eH3O-aHnTKY7-ADp-4Vsi":0.2655832454718557, "RA4epe-lWWnOff-bpM-bSR4":0.7523252210222883, "6ysu2R-gSc5dwU-cv0-LqCJ":0.7830899322716732, "tVl3TY-o42NMVO-k3S-iqOY":0.7923823401215799, "NMgTrr-W1RrCvP-Zaf-paL7":0.4686928654756936, "d1CJmF-CeG5asM-xms-1dwN":0.7622908781076493, "N1D30g-zFjiGzI-eHC-Sof4":0.847542878440137, "tOhfKu-Gdtf9Ne-KwA-JdHV":0.4999285217445154, "XLzwK0-6ocGDrS-TtU-wlEI":0.3985354402705095, "XDgZfb-Sxc45Zn-mVO-S2QO":0.05791580337644187, "GQD7a0-fnt9BZs-Kvh-dPbJ":0.663903859916476, "9dJxj9-HFwEQMY-6p9-s8Vt":0.2194407595305434, "1qU9pA-QJGAna9-JoG-H7GS":0.8877401947295382, "rKIkxA-UnGWYSn-0li-ziuB":0.1607906275036466, "tbPazx-IjUrQ8J-NZe-VOPL":0.6809166916797593, "xBpSIv-U6ojkK7-9p5-LviD":0.1195672647379901, "88bnWI-pxrKa7T-n2d-tXk9":0.1956068951787721, "0XviXp-9ksT8s0-fDy-35SW":0.8690659418822626, "e0XauA-GNRALmd-SM2-Y4Gf":0.6840816888752089, "kyvYBk-Bk5M4Xq-gxX-kE1B":0.7744771682336401, "dIiQzS-5sT4ogL-6IV-tLmb":0.0340772833497166, "OlGOyH-dyL1nzj-B2M-z8ir":0.3765608037933722, "zC9Gtn-x8hpfPD-KOu-k31W":0.864392047887076, "qSq3z2-Lpv0YcB-hBq-Sabd":0.1542847609246678, "LSyNyi-tBZUx1l-hAj-mwsx":0.304034328298701, "2c9aTP-hXloMK7-ufH-dgq6":0.1016852552953107, "aXksHO-zARQxfo-sgS-8Bf4":0.5490533082019959, "ioOXAL-eVUF0W8-vZx-ZeYX":0.4528164038481785, "DXUkAP-A7SqnHj-V4U-PJfz":0.3607407447425939, "cnzZXk-AOMepfN-hym-qbDH":0.4587361500592568, "CMlAd6-8FF1yXs-fae-Izfv":0.07555019720825917, "qiXnUv-e2PsJWm-tLF-KpjE":0.9409681065363688, "Gfx3k9-JvXa7Wd-rI1-1e1E":0.7492793312178226}	{"name":"r8HXXQM4XHoI", "age":238221053, "tip":"2023-07-26 15:40:37.694000"}
-
--- !array_max_orc --
-11028
-
--- !offsets_check --
-0	[1, 2]	[[], [3], null]	{"a":1, "b":2}	{"s1":"e", "s2":null}
-1	[]	[]	{}	\N
-2	\N	\N	\N	{"s1":"h", "s2":10}
-3	[5, null]	[[6, 7], [8, null], null]	{"f":1, "g":null}	{"s1":null, "s2":9}
-
--- !map_with_nullable_key --
-\N	\N	\N	\N	\N	\N	\N	\N	\N		test		test	aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa	test	3	4	5.1	6.2	true	false	-1.2	12.30	-1234.5678	123456789.12340000	-1234567890.12345678	1234567890123456789012.1234567800000000	test2	{"test":"test"}	{"test":"test"}	{"test":"test"}	{3:3}	{4:4}	{5:5}	{6:6}	{1:1}	{-1.2:-1.2}	{12.30:12.30}	{-1234.5678:-1234.5678}	{123456789.12340000:123456789.12340000}	{-1234567890.12345678:-1234567890.12345678}	{1234567890123456789012.1234567800000000:1234567890123456789012.1234567800000000}	["test"]	[3]	[4]	[5]	[6]	[1]	["test"]	["test"]	[-1.2]	[12.30]	[-1234.5678]	[123456789.12340000]	[-1234567890.12345678]	[1234567890123456789012.1234567800000000]	{"s_bigint":1}	{"test":[{"s_int":1}]}	{"struct_field":["1", "2", "3"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":null, "struct_non_nulls_after_nulls2":"some string"}	{"struct_field1":null, "struct_field2":"some string", "strict_field3":{"nested_struct_field1":null, "nested_struct_field2":"nested_string2"}}	{"k1":"v1", "k2":null, "k3":"v3"}	[null, "test"]	["test-1", null, "test-2"]	["test", null]	[null, null, null]
-
--- !date_dict --
-2036-12-28	1898-12-28	2539-12-28
-
diff --git a/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out b/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out
index 8a104343fc4e10..57367a3bf0999c 100644
--- a/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out
+++ b/regression-test/data/external_table_p0/hive/test_external_catalog_hive.out
@@ -123,127 +123,3 @@ a126	15
 2017-09-13	2009-09-21T04:23:14.309124
 2024-03-23	2024-02-01T21:11:09.170
 
--- !q01 --
-zhangsan	1
-lisi	1
-
--- !q02 --
-1	1
-2	1
-3	1
-4	1
-
--- !q03 --
-123	china	4	56	sc
-234	america	5	67	ls
-345	cana	4	56	fy
-567	fre	7	89	pa
-
--- !q04 --
-p_partkey2	p_name2	p_mfgr2	p_brand2	p_type2	p_size2	p_con2	p_r_price2	p_comment2
-p_partkey1	p_name1	p_mfgr1	p_brand1	p_type1	p_size1	p_con1	p_r_price1	p_comment1
-p_partkey0	p_name0	p_mfgr0	p_brand0	p_type0	p_size0	p_con0	p_r_price0	p_comment0
-
--- !q05 --
-batchno	appsheet_no	filedate	t_no	tano	t_name	chged_no	mob_no2	home_no	off_no
-off_no	home_no	mob_no2	chged_no	t_name	tano	t_no	filedate	appsheet_no	batchno
-
--- !q06 --
-bill_code	dates	ord_year	ord_month	ord_quarter	on_time
-
--- !q07 --
-2
-
--- !q08 --
-123	zhangsan	12	123.45	2022-01-01
-124	lisi	12	123.45	2022-01-01
-125	lisan	12	123.45	2022-01-02
-
--- !q09 --
-a123	12
-a124	13
-a125	14
-a126	15
-
--- !par_fields_in_file_orc1 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet1 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc2 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet2 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc3 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet3 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc4 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet4 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc5 --
-
--- !par_fields_in_file_parquet5 --
-
--- !par_fields_in_file_orc1 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet1 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc2 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet2 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc3 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet3 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc4 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_parquet4 --
-1	Alice	100.0	2023	8
-2	Bob	150.0	2023	8
-
--- !par_fields_in_file_orc5 --
-
--- !par_fields_in_file_parquet5 --
-
--- !parquet_adjusted_utc --
-1997-09-21	1999-01-12T15:12:31.235784
-1998-01-12	1993-06-11T11:33:12.356500
-2002-09-29	2001-01-17T21:23:42.120
-2008-08-07	2023-09-23T11:12:17.458
-2009-11-13	2011-11-12T01:23:06.986
-2012-07-08	2023-11-09T20:21:16.321
-2017-09-13	2009-09-21T04:23:14.309124
-2024-03-23	2024-02-01T21:11:09.170
-
diff --git a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
index 0402feef40e6b5..deda902d300d43 100644
--- a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
+++ b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
@@ -119,123 +119,3 @@
 -- !q06 --
 2023-01-03T00:00	100	0.3	test3
 
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
--- !q01 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-0.3	test3	2023-01-03T00:00	100
-
--- !q02 --
-0.1	test1	2023-01-01T00:00	\N
-0.2	test2	2023-01-02T00:00	\N
-
--- !q03 --
-0.3	test3	2023-01-03T00:00	100
-
--- !q04 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-2023-01-03T00:00	100	0.3	test3
-
--- !q05 --
-2023-01-01T00:00	\N	0.1	test1
-2023-01-02T00:00	\N	0.2	test2
-
--- !q06 --
-2023-01-03T00:00	100	0.3	test3
-
diff --git a/regression-test/data/external_table_p0/hive/test_hive_compress_type.out b/regression-test/data/external_table_p0/hive/test_hive_compress_type.out
index ca9ca885c5b854..ee4c9a8f2731ba 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_compress_type.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_compress_type.out
@@ -1,486 +1,440 @@
 -- This file is automatically generated. You should know what you did if you want to edit this
--- !q21 --
-600005
-
--- !q22 --
-1510010
-
--- !q23 --
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	2023-08-21
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	bzip2
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	bzip2
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	deflate
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	deflate
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	gzip
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	gzip
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	lz4
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	plain
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	plain
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	snappy
-
--- !q31 --
-600005
-
--- !q32 --
-1510010
-
--- !q33 --
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	2023-08-21
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	bzip2
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	bzip2
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	deflate
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	deflate
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	gzip
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	gzip
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	lz4
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	mix
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	plain
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	plain
-4611870011201662970	0	HD Tube 5* 	1	2014-03-22T05:11:29	2014-03-22	598875	4243808759	92f6fe1be9b9773206d6b63e50feb470                  	196	2314158381335918424	0	3	3	http://public_search		yandex.ru.livemaster		0	0	[]	[4,15,333,3912,14512,12818]	[18,348,1010]	[]	1846	952	29	10	1	0.77	0	0	24	73d7	1	1	0	0			3238011	0	0		0	0	1119	641	157	2014-03-22T19:51:48	0	0	0	0	utf-8	330	0	0	0	7774109565808082252	11274076	0	0	0	0	0	E 	2014-03-22T11:54:54	55	2	3	4	6	[105,11,9,88,45,14,98,72,3,925,2193,6,25,1]	3137666015	cc184643699dccab8d5d4af796c47449                  	-1	-1	-1	nD  	Tp  			0	-1	0	0	81	0	0	0	-1	-1	-1	-1	-1	-1	-1	-1	0		0		07d21f	0	[]											0	15284527577228392792	14270691585016129648	0	0				[]	[]	[]	[]	[]	\N	c1889e2b9ad1e219ed04c0e9624b5139        	1404	0	snappy
-
 -- !q42 --
 215
 
 -- !q43 --
 1	100	5	1000000000	10.5	20.75	true	First	A	Alpha	2023-10-06	2023-10-06T14:30	123.45
-1	578	55	2111222273	56.858597	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
-1	979	44	10163954251	28.827957	57.56879940298416	true	Random	Q	DNRGE	2023-12-09	2023-12-10T20:21:58	1581.25
+1	578	55	2111222273	56.8586	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
+1	979	44	10163954251	28.82796	57.56879940298416	true	Random	Q	DNRGE	2023-12-09	2023-12-10T20:21:58	1581.25
 10	1000	50	10000000000	55.25	65.75	false	Tenth	J	Kappa	2023-10-15	2023-10-15T23:30	1012.34
-10	210	26	8549838179	23.438345	73.36477128189287	true	Random	N	VVXIF	2023-11-24	2023-12-13T18:04:58	226.65
-10	386	51	1214815770	13.959902	36.64197990482059	false	Random	J	ORLGI	2023-12-18	2023-11-27T17:13:58	852.62
-10	966	38	2203748112	45.555325	27.908447208440094	true	Random	W	LFAGO	2023-12-14	2023-11-26T20:00:58	1898.68
+10	210	26	8549838179	23.43834	73.36477128189287	true	Random	N	VVXIF	2023-11-24	2023-12-13T18:04:58	226.65
+10	386	51	1214815770	13.9599	36.64197990482059	false	Random	J	ORLGI	2023-12-18	2023-11-27T17:13:58	852.62
+10	966	38	2203748112	45.55532	27.90844720844009	true	Random	W	LFAGO	2023-12-14	2023-11-26T20:00:58	1898.68
 100	281	26	3174393241	51.05278	52.09566669589555	false	Random	F	SLDWB	2023-12-14	2023-12-12T07:03:58	798.30
 100	289	71	4919981667	66.56684	69.73132704711037	true	Random	V	QOLAP	2023-12-17	2023-12-23T09:38:58	217.05
-11	1100	55	11000000000	60.5	70.0	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
-11	426	67	8473986652	17.942455	71.80682514420877	true	Random	X	FXDUV	2023-12-04	2023-12-22T07:51:58	129.81
-11	441	19	7370044350	74.261696	62.013817404758086	true	Random	D	UYKZA	2023-12-23	2023-12-15T11:49:58	1805.14
-11	487	27	14556302216	85.33334	62.596750833474495	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
-11	770	17	7962512669	12.508753	83.33847413902296	true	Random	P	LHJRA	2023-12-06	2023-12-04T15:48:58	970.51
+11	1100	55	11000000000	60.5	70	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
+11	426	67	8473986652	17.94246	71.80682514420877	true	Random	X	FXDUV	2023-12-04	2023-12-22T07:51:58	129.81
+11	441	19	7370044350	74.2617	62.01381740475809	true	Random	D	UYKZA	2023-12-23	2023-12-15T11:49:58	1805.14
+11	487	27	14556302216	85.33334	62.59675083347449	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
+11	770	17	7962512669	12.50875	83.33847413902296	true	Random	P	LHJRA	2023-12-06	2023-12-04T15:48:58	970.51
 12	1200	60	12000000000	65.75	75.25	false	Twelfth	L	Mu	2023-10-17	2023-10-17T02:15	1234.56
-12	751	8	12205294947	23.468674	64.35048302450815	true	Random	K	FCSBV	2023-12-03	2023-12-17T01:10:58	325.26
+12	751	8	12205294947	23.46867	64.35048302450815	true	Random	K	FCSBV	2023-12-03	2023-12-17T01:10:58	325.26
 12	782	48	5080583047	75.55138	49.6324463213595	true	Random	N	WYJDW	2023-12-16	2023-12-18T02:58:58	944.42
-12	987	73	1432735571	40.308147	43.5019559828596	true	Random	S	MZUNG	2023-12-07	2023-12-03T13:42:58	215.12
+12	987	73	1432735571	40.30815	43.5019559828596	true	Random	S	MZUNG	2023-12-07	2023-12-03T13:42:58	215.12
 13	1300	65	13000000000	70.0	80.5	true	Thirteenth	M	Nu	2023-10-18	2023-10-18T03:30	1345.67
-13	335	39	13869202091	30.426075	39.02304533093442	true	Random	L	AULCC	2023-12-08	2023-12-13T00:26:58	387.97
+13	335	39	13869202091	30.42607	39.02304533093442	true	Random	L	AULCC	2023-12-08	2023-12-13T00:26:58	387.97
 13	402	30	10851194313	74.82481	74.90108005771035	false	Random	F	GEMMK	2023-11-27	2023-12-21T15:03:58	1643.55
-13	503	34	6763884255	23.660393	63.9797872103468	true	Random	S	POEBK	2023-12-22	2023-12-23T23:16:58	486.62
-13	696	74	3370487489	84.544014	88.69976219408227	true	Random	H	RTFJI	2023-11-23	2023-11-25T07:32:58	1761.50
-13	745	48	13047949175	51.168613	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
-13	859	65	7433576046	56.136265	34.87823331022725	false	Random	L	CRFUF	2023-12-23	2023-12-12T15:05:58	1037.15
+13	503	34	6763884255	23.66039	63.9797872103468	true	Random	S	POEBK	2023-12-22	2023-12-23T23:16:58	486.62
+13	696	74	3370487489	84.54401	88.69976219408227	true	Random	H	RTFJI	2023-11-23	2023-11-25T07:32:58	1761.50
+13	745	48	13047949175	51.16861	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
+13	859	65	7433576046	56.13626	34.87823331022725	false	Random	L	CRFUF	2023-12-23	2023-12-12T15:05:58	1037.15
 14	1400	70	14000000000	75.25	85.75	false	Fourteenth	N	Xi	2023-10-19	2023-10-19T04:45	1456.78
-14	195	17	2370700139	16.777058	64.81793301410002	false	Random	P	IIGRE	2023-12-12	2023-12-14T22:40:58	1678.44
-14	966	65	7828602539	62.430664	68.85873133439297	true	Random	I	VVOQH	2023-12-01	2023-12-06T00:54:58	1300.43
-14	968	16	11314514196	62.509666	33.1841427251225	false	Random	T	WDEVJ	2023-11-24	2023-12-06T17:54:58	431.61
-15	1500	75	15000000000	80.5	90.0	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
+14	195	17	2370700139	16.77706	64.81793301410002	false	Random	P	IIGRE	2023-12-12	2023-12-14T22:40:58	1678.44
+14	966	65	7828602539	62.43066	68.85873133439297	true	Random	I	VVOQH	2023-12-01	2023-12-06T00:54:58	1300.43
+14	968	16	11314514196	62.50967	33.1841427251225	false	Random	T	WDEVJ	2023-11-24	2023-12-06T17:54:58	431.61
+15	1500	75	15000000000	80.5	90	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
 15	703	67	4284267079	85.38059	91.41088583496226	true	Random	T	PHZRC	2023-12-04	2023-12-08T15:54:58	185.19
-16	135	22	7901304568	43.944805	85.16901944253635	true	Random	K	NUQEP	2023-11-29	2023-11-25T23:42:58	1440.74
+16	135	22	7901304568	43.94481	85.16901944253635	true	Random	K	NUQEP	2023-11-29	2023-11-25T23:42:58	1440.74
 16	615	20	12294128025	77.37379	20.42772029677839	true	Random	U	JHPOB	2023-11-30	2023-12-16T14:29:58	1105.33
-17	289	49	13560709243	39.952793	38.245306832599425	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
-17	499	46	11230409207	51.632103	28.811164197154774	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
+17	289	49	13560709243	39.95279	38.24530683259943	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
+17	499	46	11230409207	51.6321	28.81116419715477	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
 17	646	62	11234805830	76.40492	67.46425239009778	true	Random	N	REHZC	2023-12-09	2023-11-28T02:06:58	365.15
 17	698	55	1807368797	20.17171	43.84496606184709	true	Random	P	SHSJV	2023-12-01	2023-11-25T11:56:58	810.95
 17	794	14	8377523030	28.07663	52.3837762020057	false	Random	E	WPMIN	2023-12-03	2023-11-26T04:59:58	239.42
 17	913	32	4647929554	78.91502	70.54487265463735	true	Random	S	WFPNS	2023-11-27	2023-11-26T03:29:58	321.45
-18	690	17	1399456103	63.261967	42.964715823771236	true	Random	R	BWSRS	2023-12-13	2023-12-23T08:33:58	1840.02
-18	835	17	14265814864	18.923101	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
-19	917	66	2340946367	89.035675	22.649362455875274	false	Random	D	HWHMU	2023-11-30	2023-12-10T02:36:58	1960.07
-19	993	13	7039833438	79.769066	69.79049291517285	true	Random	X	OFSUV	2023-12-11	2023-12-08T01:46:58	1958.95
+18	690	17	1399456103	63.26197	42.96471582377124	true	Random	R	BWSRS	2023-12-13	2023-12-23T08:33:58	1840.02
+18	835	17	14265814864	18.9231	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
+19	917	66	2340946367	89.03568	22.64936245587527	false	Random	D	HWHMU	2023-11-30	2023-12-10T02:36:58	1960.07
+19	993	13	7039833438	79.76907	69.79049291517285	true	Random	X	OFSUV	2023-12-11	2023-12-08T01:46:58	1958.95
 2	200	10	2000000000	15.75	25.5	false	Second	B	Beta	2023-10-07	2023-10-07T15:45	234.56
 2	850	75	7075823565	83.65178	62.56093886118189	false	Random	F	RFHAG	2023-11-24	2023-12-03T01:06:58	495.12
 2	921	62	8557914543	78.52379	58.6849882881372	false	Random	D	KBXXS	2023-12-07	2023-12-02T22:24:58	1782.88
-2	925	46	6013180177	41.107002	34.86561026061906	true	Random	L	XLLXY	2023-12-06	2023-12-09T14:04:58	1246.26
-20	248	64	7704906572	35.089928	76.69128821479936	true	Random	T	KQOMS	2023-11-30	2023-12-11T01:35:58	1799.26
-22	200	41	12163439252	64.621254	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
+2	925	46	6013180177	41.107	34.86561026061906	true	Random	L	XLLXY	2023-12-06	2023-12-09T14:04:58	1246.26
+20	248	64	7704906572	35.08993	76.69128821479936	true	Random	T	KQOMS	2023-11-30	2023-12-11T01:35:58	1799.26
+22	200	41	12163439252	64.62125	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
 22	235	19	6963606423	65.68033	54.1995295752517	true	Random	E	ENVRH	2023-12-22	2023-11-29T14:42:58	864.89
-23	192	8	5102667616	54.111057	40.85713971600841	false	Random	J	EBXEB	2023-12-13	2023-12-10T11:32:58	1824.12
-27	452	74	4240215371	50.569168	75.68204627611644	true	Random	G	AZOWU	2023-12-01	2023-11-26T06:24:58	201.31
+23	192	8	5102667616	54.11106	40.85713971600841	false	Random	J	EBXEB	2023-12-13	2023-12-10T11:32:58	1824.12
+27	452	74	4240215371	50.56917	75.68204627611644	true	Random	G	AZOWU	2023-12-01	2023-11-26T06:24:58	201.31
 27	866	24	5531365994	72.77447	86.96690821165853	false	Random	S	TZPFJ	2023-11-28	2023-12-13T15:31:58	1274.75
-28	655	21	14580233860	12.503378	48.60220286874443	false	Random	P	DUBQQ	2023-12-12	2023-12-03T20:11:58	922.42
-29	157	34	2302882987	51.924015	20.311140937696468	true	Random	R	MBOXJ	2023-12-02	2023-12-03T14:12:58	1620.80
-29	910	52	5544039917	22.179396	46.32732226806482	true	Random	C	TIZAG	2023-11-28	2023-12-14T16:08:58	900.96
-29	923	57	1591814253	68.57371	33.342802789892986	true	Random	Q	ZONGC	2023-12-20	2023-12-13T09:11:58	1465.38
-3	259	74	7422478791	22.291426	75.38227773520089	true	Random	S	VWAXJ	2023-12-01	2023-12-05T21:23:58	1970.57
-3	300	15	3000000000	20.25	30.0	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
-3	422	25	5996825874	89.173584	62.758513798505824	false	Random	Z	CDYAO	2023-12-14	2023-12-08T09:27:58	567.23
+28	655	21	14580233860	12.50338	48.60220286874443	false	Random	P	DUBQQ	2023-12-12	2023-12-03T20:11:58	922.42
+29	157	34	2302882987	51.92402	20.31114093769647	true	Random	R	MBOXJ	2023-12-02	2023-12-03T14:12:58	1620.80
+29	910	52	5544039917	22.1794	46.32732226806482	true	Random	C	TIZAG	2023-11-28	2023-12-14T16:08:58	900.96
+29	923	57	1591814253	68.57371	33.34280278989299	true	Random	Q	ZONGC	2023-12-20	2023-12-13T09:11:58	1465.38
+3	259	74	7422478791	22.29143	75.38227773520089	true	Random	S	VWAXJ	2023-12-01	2023-12-05T21:23:58	1970.57
+3	300	15	3000000000	20.25	30	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
+3	422	25	5996825874	89.17358	62.75851379850582	false	Random	Z	CDYAO	2023-12-14	2023-12-08T09:27:58	567.23
 3	668	60	1942550969	83.43451	87.15906153619602	true	Random	F	QYSRS	2023-12-22	2023-12-10T22:17:58	320.22
-30	292	71	10308444223	63.039078	76.40649540444898	false	Random	G	DRLHY	2023-12-19	2023-12-14T15:32:58	1165.14
-30	572	6	3022031043	57.813908	72.29244668177799	true	Random	X	EHJDN	2023-12-11	2023-12-12T02:44:58	910.38
-30	830	65	12624057029	38.791172	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
+30	292	71	10308444223	63.03908	76.40649540444898	false	Random	G	DRLHY	2023-12-19	2023-12-14T15:32:58	1165.14
+30	572	6	3022031043	57.81391	72.29244668177799	true	Random	X	EHJDN	2023-12-11	2023-12-12T02:44:58	910.38
+30	830	65	12624057029	38.79117	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
 31	395	22	6141426904	88.37914	52.0655270963123	false	Random	J	DRPJV	2023-12-07	2023-11-29T03:15:58	1076.41
-31	990	5	13678786851	15.762894	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
+31	990	5	13678786851	15.76289	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
 33	198	20	13225406950	67.7327	58.63863378877107	true	Random	I	ZKXRA	2023-12-07	2023-11-29T02:33:58	566.76
 33	321	39	12537851805	38.26871	32.6626492245712	true	Random	S	OICCE	2023-12-02	2023-12-19T16:41:58	306.92
-33	916	53	5666674210	57.998173	61.774881852563475	true	Random	J	WJAXA	2023-11-27	2023-12-05T19:58:58	976.13
+33	916	53	5666674210	57.99817	61.77488185256347	true	Random	J	WJAXA	2023-11-27	2023-12-05T19:58:58	976.13
 34	145	44	14060350663	73.02436	68.40544929600975	true	Random	S	UUJFP	2023-11-23	2023-12-12T06:08:58	739.45
-34	585	43	1429300527	61.706585	80.88100239373303	false	Random	O	JKJOH	2023-12-17	2023-12-07T11:00:58	468.11
-35	297	75	2468378214	51.353462	34.18114780065386	false	Random	C	HBYZO	2023-12-05	2023-12-09T21:42:58	534.70
+34	585	43	1429300527	61.70658	80.88100239373303	false	Random	O	JKJOH	2023-12-17	2023-12-07T11:00:58	468.11
+35	297	75	2468378214	51.35346	34.18114780065386	false	Random	C	HBYZO	2023-12-05	2023-12-09T21:42:58	534.70
 37	438	39	6809169396	83.56728	40.90894521029911	true	Random	W	GXPAY	2023-12-07	2023-12-18T06:35:58	383.18
 38	606	57	14585148556	82.67463	79.18300302689997	false	Random	E	RSFUZ	2023-12-16	2023-11-27T18:55:58	970.25
-39	726	50	3865644066	26.225628	28.534393094364418	false	Random	F	NIUCS	2023-12-05	2023-12-04T19:31:58	1953.82
+39	726	50	3865644066	26.22563	28.53439309436442	false	Random	F	NIUCS	2023-12-05	2023-12-04T19:31:58	1953.82
 4	122	24	10738473173	81.15482	60.21481394154484	false	Random	Y	PQJRK	2023-12-20	2023-12-09T02:38:58	1467.35
 4	400	20	4000000000	25.5	35.25	false	Fourth	D	Delta	2023-10-09	2023-10-09T17:30	456.78
-4	569	72	10560903405	50.255936	47.535145739285184	false	Random	O	NRIRC	2023-12-05	2023-12-01T09:10:58	1986.99
+4	569	72	10560903405	50.25594	47.53514573928518	false	Random	O	NRIRC	2023-12-05	2023-12-01T09:10:58	1986.99
 4	682	22	2040832636	60.33469	67.33499498711046	true	Random	W	QUICJ	2023-11-24	2023-12-14T10:17:58	579.56
-40	230	34	10824964541	16.929768	53.812277279703366	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
+40	230	34	10824964541	16.92977	53.81227727970337	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
 40	693	69	13276482882	44.35974	82.57845708670757	true	Random	B	RCCSU	2023-11-29	2023-12-01T20:11:58	183.64
-40	914	7	4902128502	19.442041	33.099787387344406	true	Random	Q	KOCWA	2023-11-28	2023-12-21T09:20:58	1824.80
-41	344	34	14536795918	56.660946	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
+40	914	7	4902128502	19.44204	33.09978738734441	true	Random	Q	KOCWA	2023-11-28	2023-12-21T09:20:58	1824.80
+41	344	34	14536795918	56.66095	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
 41	599	54	8095449906	22.58196	37.99742597458578	false	Random	T	GTQXP	2023-12-12	2023-12-22T19:08:58	743.46
-41	697	21	1200243566	12.466168	68.57243624557165	true	Random	U	JZGEG	2023-12-03	2023-12-10T04:51:58	1323.88
+41	697	21	1200243566	12.46617	68.57243624557165	true	Random	U	JZGEG	2023-12-03	2023-12-10T04:51:58	1323.88
 41	708	64	11745827370	72.84812	35.31028363777645	true	Random	O	WGSQC	2023-12-02	2023-11-25T17:07:58	1666.71
-41	840	65	8988241658	37.428593	42.25992474748068	false	Random	E	HURYX	2023-12-22	2023-12-19T01:55:58	141.89
+41	840	65	8988241658	37.42859	42.25992474748068	false	Random	E	HURYX	2023-12-22	2023-12-19T01:55:58	141.89
 42	143	42	3421815721	65.27691	87.91368867538209	true	Random	S	AXGVL	2023-12-06	2023-11-29T07:36:58	575.01
 42	178	38	7559404453	69.69449	64.37154501388798	true	Random	G	QUMUN	2023-12-14	2023-12-17T01:37:58	1190.44
-42	192	28	14454791024	35.465202	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
+42	192	28	14454791024	35.4652	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
 42	355	72	11536856285	74.42886	53.49032479461299	false	Random	I	IQZEI	2023-12-10	2023-12-06T07:17:58	1098.14
-43	178	64	6969956763	40.980415	52.998828731408516	true	Random	C	XQHYB	2023-12-11	2023-12-07T23:00:58	257.08
+43	178	64	6969956763	40.98042	52.99882873140852	true	Random	C	XQHYB	2023-12-11	2023-12-07T23:00:58	257.08
 43	828	24	12011396947	45.07647	54.2136449479346	true	Random	E	HIDUO	2023-12-02	2023-12-19T01:14:58	233.10
 44	219	38	8596488294	73.52956	94.10797854680568	true	Random	E	HMWBI	2023-12-15	2023-12-06T00:51:58	1907.47
-44	694	55	3626514138	62.504086	72.89799265418553	true	Random	Z	JTDVF	2023-12-01	2023-11-29T12:08:58	1769.92
-44	912	63	8534761366	55.993538	50.235171557550416	false	Random	N	OVQRQ	2023-12-08	2023-11-24T03:39:58	264.92
-44	928	7	1939079012	14.426672	68.86451571230457	false	Random	I	EKVWY	2023-12-15	2023-12-09T10:43:58	846.74
-45	455	25	12639246000	47.011307	26.310712594958694	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
-45	492	43	3870916386	51.069588	42.652270406300794	true	Random	H	JVZTB	2023-12-04	2023-12-09T21:06:58	1517.83
-47	508	48	1456473942	48.488297	20.377955902326608	false	Random	B	CAOEY	2023-11-29	2023-12-10T14:49:58	1865.52
-47	566	50	1426586688	51.278687	40.47151456873397	true	Random	F	YBOSH	2023-11-26	2023-12-15T03:44:58	1806.35
+44	694	55	3626514138	62.50409	72.89799265418553	true	Random	Z	JTDVF	2023-12-01	2023-11-29T12:08:58	1769.92
+44	912	63	8534761366	55.99354	50.23517155755042	false	Random	N	OVQRQ	2023-12-08	2023-11-24T03:39:58	264.92
+44	928	7	1939079012	14.42667	68.86451571230457	false	Random	I	EKVWY	2023-12-15	2023-12-09T10:43:58	846.74
+45	455	25	12639246000	47.01131	26.31071259495869	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
+45	492	43	3870916386	51.06959	42.65227040630079	true	Random	H	JVZTB	2023-12-04	2023-12-09T21:06:58	1517.83
+47	508	48	1456473942	48.4883	20.37795590232661	false	Random	B	CAOEY	2023-11-29	2023-12-10T14:49:58	1865.52
+47	566	50	1426586688	51.27869	40.47151456873397	true	Random	F	YBOSH	2023-11-26	2023-12-15T03:44:58	1806.35
 47	838	73	14910230294	83.69784	82.28901816600579	true	Random	L	SHXYL	2023-11-24	2023-12-05T22:19:58	1062.15
 48	898	59	12871187130	10.13838	70.19705104611333	true	Random	J	WFXNN	2023-12-23	2023-12-17T02:53:58	1050.21
-49	165	38	4482178563	34.706547	69.17129468406594	false	Random	W	CPZNY	2023-12-15	2023-11-23T19:56:58	512.60
-49	412	16	8300982793	56.263252	66.07893608061771	false	Random	K	DWWJI	2023-12-08	2023-12-17T11:32:58	1718.54
+49	165	38	4482178563	34.70655	69.17129468406594	false	Random	W	CPZNY	2023-12-15	2023-11-23T19:56:58	512.60
+49	412	16	8300982793	56.26325	66.07893608061771	false	Random	K	DWWJI	2023-12-08	2023-12-17T11:32:58	1718.54
 49	511	51	8602055259	88.1686	88.98712207285577	false	Random	M	ZDKEY	2023-12-10	2023-11-25T02:44:58	241.08
-49	568	70	2916596630	79.16303	56.114316916863025	false	Random	T	ILLIU	2023-11-23	2023-12-07T11:05:58	1039.03
+49	568	70	2916596630	79.16303	56.11431691686303	false	Random	T	ILLIU	2023-11-23	2023-12-07T11:05:58	1039.03
 5	500	25	5000000000	30.75	40.5	true	Fifth	E	Epsilon	2023-10-10	2023-10-10T18:45	567.89
-5	768	5	4152322228	41.128906	78.60686390712706	false	Random	J	LXKRA	2023-12-05	2023-11-24T18:13:58	1941.98
-5	823	63	13328808917	77.768196	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
-5	887	74	4082758600	22.797577	93.28246034891224	false	Random	V	MPPGX	2023-12-01	2023-11-29T01:53:58	510.50
+5	768	5	4152322228	41.12891	78.60686390712706	false	Random	J	LXKRA	2023-12-05	2023-11-24T18:13:58	1941.98
+5	823	63	13328808917	77.7682	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
+5	887	74	4082758600	22.79758	93.28246034891224	false	Random	V	MPPGX	2023-12-01	2023-11-29T01:53:58	510.50
 50	126	58	4433111715	75.31828	43.28056186824247	false	Random	H	UTDJF	2023-12-19	2023-12-10T08:24:58	368.42
 51	778	59	13914307584	27.48499	91.47665081887983	true	Random	X	FGFHK	2023-12-01	2023-12-10T03:24:58	402.63
-51	898	32	13510411411	18.679659	21.406761033351007	false	Random	L	FECUW	2023-12-10	2023-12-14T02:00:58	700.43
-52	811	31	14085958816	51.067017	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
+51	898	32	13510411411	18.67966	21.40676103335101	false	Random	L	FECUW	2023-12-10	2023-12-14T02:00:58	700.43
+52	811	31	14085958816	51.06702	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
 53	505	52	9862728376	58.40501	57.60544454281924	false	Random	V	WYCTZ	2023-11-24	2023-12-20T05:13:58	210.43
 53	667	49	10531976747	50.22229	49.64660893042742	false	Random	K	WNRJE	2023-12-04	2023-12-19T14:57:58	680.97
-53	713	14	1464447148	23.474258	45.35056918414047	false	Random	Q	UHMLT	2023-12-10	2023-11-30T02:07:58	286.70
+53	713	14	1464447148	23.47426	45.35056918414047	false	Random	Q	UHMLT	2023-12-10	2023-11-30T02:07:58	286.70
 53	715	29	10917905565	41.83069	93.50885201221966	true	Random	U	TRLSY	2023-12-03	2023-11-26T15:13:58	369.72
-54	467	42	13684826428	38.491455	90.10566649802195	true	Random	M	ERFBG	2023-11-24	2023-12-02T16:23:58	211.00
-54	827	55	7054839267	58.555687	25.891004802115663	false	Random	O	ASMLW	2023-12-13	2023-12-20T16:41:58	1369.32
-54	843	34	9547939940	38.66475	36.370944299232434	true	Random	P	NTVIR	2023-12-12	2023-12-02T06:45:58	1628.37
+54	467	42	13684826428	38.49146	90.10566649802195	true	Random	M	ERFBG	2023-11-24	2023-12-02T16:23:58	211.00
+54	827	55	7054839267	58.55569	25.89100480211566	false	Random	O	ASMLW	2023-12-13	2023-12-20T16:41:58	1369.32
+54	843	34	9547939940	38.66475	36.37094429923243	true	Random	P	NTVIR	2023-12-12	2023-12-02T06:45:58	1628.37
 55	908	24	13623721787	40.06427	90.85281792731746	false	Random	B	KFZGI	2023-11-27	2023-12-23T18:06:58	1124.95
-55	964	8	14038541765	70.24135	20.034551391620194	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
-57	936	26	12164628867	56.541275	56.276679149397076	true	Random	O	IPHPZ	2023-12-13	2023-11-30T22:36:58	603.68
-59	144	31	6208909394	67.417076	40.59765633709834	true	Random	D	FLWNA	2023-12-12	2023-12-19T06:17:58	1870.24
+55	964	8	14038541765	70.24135	20.03455139162019	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
+57	936	26	12164628867	56.54128	56.27667914939708	true	Random	O	IPHPZ	2023-12-13	2023-11-30T22:36:58	603.68
+59	144	31	6208909394	67.41708	40.59765633709834	true	Random	D	FLWNA	2023-12-12	2023-12-19T06:17:58	1870.24
 59	509	50	5501336408	39.94401	73.35770882761237	true	Random	I	PVZNO	2023-12-04	2023-11-27T04:40:58	1177.33
 6	600	30	6000000000	35.25	45.75	false	Sixth	F	Zeta	2023-10-11	2023-10-11T19:15	678.90
-60	711	69	1493870104	22.574188	61.30347648465907	false	Random	E	FHKVR	2023-11-27	2023-12-05T11:26:58	1981.61
-60	875	42	14283877167	48.811504	67.0706975606688	true	Random	P	VJOZH	2023-12-06	2023-12-15T05:20:58	781.71
-61	267	61	11407448558	12.877184	42.144845857251944	true	Random	B	NRWNW	2023-11-30	2023-11-25T09:34:58	859.85
-61	414	63	14506877706	12.540966	58.04557426323987	false	Random	H	NUOAD	2023-12-10	2023-12-06T22:52:58	780.50
-62	451	50	12304139502	51.151623	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
-62	793	46	7308804595	39.766644	48.88672198076526	true	Random	V	TPENZ	2023-11-26	2023-12-23T17:51:58	388.46
+60	711	69	1493870104	22.57419	61.30347648465907	false	Random	E	FHKVR	2023-11-27	2023-12-05T11:26:58	1981.61
+60	875	42	14283877167	48.8115	67.0706975606688	true	Random	P	VJOZH	2023-12-06	2023-12-15T05:20:58	781.71
+61	267	61	11407448558	12.87718	42.14484585725194	true	Random	B	NRWNW	2023-11-30	2023-11-25T09:34:58	859.85
+61	414	63	14506877706	12.54097	58.04557426323987	false	Random	H	NUOAD	2023-12-10	2023-12-06T22:52:58	780.50
+62	451	50	12304139502	51.15162	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
+62	793	46	7308804595	39.76664	48.88672198076526	true	Random	V	TPENZ	2023-11-26	2023-12-23T17:51:58	388.46
 63	112	75	12197306353	85.90137	43.48931389222043	false	Random	C	KKAIT	2023-11-27	2023-12-23T04:23:58	1954.90
-63	383	35	5161212745	39.455276	52.33267523851794	false	Random	X	TMYMC	2023-11-29	2023-12-10T09:09:58	1442.54
-63	410	33	1767102777	72.260124	56.971483381024896	false	Random	B	QXNSM	2023-12-12	2023-12-19T22:57:58	1660.73
-64	479	20	1710421528	53.324104	33.55443503561635	false	Random	Q	ONZRK	2023-12-09	2023-12-01T22:29:58	252.13
+63	383	35	5161212745	39.45528	52.33267523851794	false	Random	X	TMYMC	2023-11-29	2023-12-10T09:09:58	1442.54
+63	410	33	1767102777	72.26012	56.9714833810249	false	Random	B	QXNSM	2023-12-12	2023-12-19T22:57:58	1660.73
+64	479	20	1710421528	53.3241	33.55443503561635	false	Random	Q	ONZRK	2023-12-09	2023-12-01T22:29:58	252.13
 64	678	14	13681447851	74.83621	36.94143092647816	true	Random	J	KELFB	2023-12-01	2023-12-07T18:14:58	308.26
-64	719	36	1224510454	64.237434	86.05689694804887	true	Random	E	ZVQPU	2023-11-30	2023-12-03T04:56:58	1879.25
-64	822	26	1154241961	52.165447	26.779469377773403	true	Random	E	YWNAD	2023-12-08	2023-12-19T19:08:58	731.15
-65	571	24	10523050555	45.865078	70.80680527390149	true	Random	Y	DILBW	2023-12-17	2023-11-25T22:41:58	859.30
-66	306	5	14448160602	44.642223	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
-66	521	30	7757576974	69.440155	92.3562810104632	false	Random	H	SSOCR	2023-12-19	2023-11-30T06:51:58	913.34
-67	484	65	10817432713	62.168163	77.02869166077757	true	Random	K	SAJMG	2023-12-19	2023-12-14T19:47:58	488.01
-68	266	31	8183454755	69.19586	23.139304803938643	false	Random	S	STCBM	2023-11-26	2023-12-22T13:42:58	1722.37
-68	554	33	3525526216	29.078024	29.6567390059356	false	Random	Y	EUGOF	2023-11-23	2023-12-15T10:33:58	395.41
-68	591	60	4813122821	33.210274	54.464145718507616	false	Random	X	EXROI	2023-12-07	2023-12-07T00:39:58	290.11
+64	719	36	1224510454	64.23743	86.05689694804887	true	Random	E	ZVQPU	2023-11-30	2023-12-03T04:56:58	1879.25
+64	822	26	1154241961	52.16545	26.7794693777734	true	Random	E	YWNAD	2023-12-08	2023-12-19T19:08:58	731.15
+65	571	24	10523050555	45.86508	70.80680527390149	true	Random	Y	DILBW	2023-12-17	2023-11-25T22:41:58	859.30
+66	306	5	14448160602	44.64222	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
+66	521	30	7757576974	69.44016	92.35628101046321	false	Random	H	SSOCR	2023-12-19	2023-11-30T06:51:58	913.34
+67	484	65	10817432713	62.16816	77.02869166077757	true	Random	K	SAJMG	2023-12-19	2023-12-14T19:47:58	488.01
+68	266	31	8183454755	69.19586	23.13930480393864	false	Random	S	STCBM	2023-11-26	2023-12-22T13:42:58	1722.37
+68	554	33	3525526216	29.07802	29.6567390059356	false	Random	Y	EUGOF	2023-11-23	2023-12-15T10:33:58	395.41
+68	591	60	4813122821	33.21027	54.46414571850762	false	Random	X	EXROI	2023-12-07	2023-12-07T00:39:58	290.11
 68	756	63	5416393421	66.41538	76.32820339134415	false	Random	Y	CUNAL	2023-12-23	2023-12-14T22:49:58	1109.25
-68	922	13	11664232196	72.683266	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
-68	947	60	7257499958	45.661217	77.42577781358565	false	Random	F	ENQGA	2023-11-24	2023-11-29T07:33:58	319.99
-69	416	14	7702410607	31.638903	89.5793904314531	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
+68	922	13	11664232196	72.68327	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
+68	947	60	7257499958	45.66122	77.42577781358565	false	Random	F	ENQGA	2023-11-24	2023-11-29T07:33:58	319.99
+69	416	14	7702410607	31.6389	89.57939043145311	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
 7	340	50	8934567449	83.79683	35.39446967734915	false	Random	L	CWYFN	2023-12-05	2023-12-23T02:26:58	806.15
-7	700	35	7000000000	40.5	50.0	true	Seventh	G	Eta	2023-10-12	2023-10-12T20:30	789.01
+7	700	35	7000000000	40.5	50	true	Seventh	G	Eta	2023-10-12	2023-10-12T20:30	789.01
 7	969	62	3451343234	57.17074	56.74513811095188	false	Random	G	OWDSC	2023-12-19	2023-12-11T17:17:58	1874.22
-70	231	67	4547989149	35.103123	51.93622592177748	true	Random	V	ZBCVY	2023-11-29	2023-12-22T11:41:58	1749.60
-70	421	23	3153379289	27.412096	79.32006404438445	false	Random	L	VLJWK	2023-12-04	2023-12-12T05:31:58	1163.35
+70	231	67	4547989149	35.10312	51.93622592177748	true	Random	V	ZBCVY	2023-11-29	2023-12-22T11:41:58	1749.60
+70	421	23	3153379289	27.4121	79.32006404438445	false	Random	L	VLJWK	2023-12-04	2023-12-12T05:31:58	1163.35
 70	751	56	7828222634	52.8313	55.7263634552559	true	Random	B	TFHMH	2023-11-30	2023-12-24T12:22:58	1166.13
-71	452	25	4464808420	18.155642	61.988641984596185	false	Random	K	YXFVY	2023-12-15	2023-12-08T04:58:58	514.74
-71	594	26	1024634104	62.92234	37.216752731371386	true	Random	J	SPUWU	2023-12-04	2023-12-23T08:50:58	779.97
-72	377	11	3042707243	55.289066	53.72552524152444	true	Random	Q	BAPHV	2023-12-06	2023-11-30T07:14:58	119.39
-73	866	49	4618070115	46.803646	91.41305051885227	true	Random	H	ROYYF	2023-12-07	2023-12-01T10:28:58	1817.67
-74	670	60	4783926122	23.513939	91.24357097091087	true	Random	Y	YFPMC	2023-12-23	2023-12-22T22:29:58	943.62
-75	368	73	6944888766	31.500992	56.88267149430107	false	Random	H	LEXKZ	2023-12-21	2023-12-14T01:12:58	443.91
+71	452	25	4464808420	18.15564	61.98864198459619	false	Random	K	YXFVY	2023-12-15	2023-12-08T04:58:58	514.74
+71	594	26	1024634104	62.92234	37.21675273137139	true	Random	J	SPUWU	2023-12-04	2023-12-23T08:50:58	779.97
+72	377	11	3042707243	55.28907	53.72552524152444	true	Random	Q	BAPHV	2023-12-06	2023-11-30T07:14:58	119.39
+73	866	49	4618070115	46.80365	91.41305051885227	true	Random	H	ROYYF	2023-12-07	2023-12-01T10:28:58	1817.67
+74	670	60	4783926122	23.51394	91.24357097091087	true	Random	Y	YFPMC	2023-12-23	2023-12-22T22:29:58	943.62
+75	368	73	6944888766	31.50099	56.88267149430107	false	Random	H	LEXKZ	2023-12-21	2023-12-14T01:12:58	443.91
 76	410	20	10425110604	66.26356	92.68329033006493	false	Random	L	JHFYD	2023-11-23	2023-11-29T10:34:58	867.56
-76	504	70	14161652666	58.071503	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
+76	504	70	14161652666	58.0715	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
 77	131	19	2964167114	33.23181	53.35246738882714	false	Random	G	AHGFO	2023-12-19	2023-12-01T10:11:58	1837.90
-77	165	36	12887722637	19.729382	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
-79	314	17	6823498005	22.562634	72.70049796639023	true	Random	K	FPSNZ	2023-12-07	2023-12-15T11:52:58	211.50
+77	165	36	12887722637	19.72938	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
+79	314	17	6823498005	22.56263	72.70049796639023	true	Random	K	FPSNZ	2023-12-07	2023-12-15T11:52:58	211.50
 8	550	48	13655992126	52.90345	51.35114230137935	false	Random	X	JTVSE	2023-12-13	2023-12-15T03:49:58	361.55
 8	800	40	8000000000	45.75	55.25	false	Eighth	H	Theta	2023-10-13	2023-10-13T21:45	890.12
 8	866	37	13672147880	81.28999	67.66548594336737	false	Random	H	QDJIM	2023-12-14	2023-12-17T18:44:58	1112.05
-80	267	57	8797946135	35.604717	80.51381110359165	false	Random	K	KQTEX	2023-12-09	2023-12-13T06:19:58	1769.15
-80	815	19	14529289205	19.769405	37.37008094684765	true	Random	Z	WLALH	2023-12-11	2023-12-14T03:24:58	479.38
+80	267	57	8797946135	35.60472	80.51381110359165	false	Random	K	KQTEX	2023-12-09	2023-12-13T06:19:58	1769.15
+80	815	19	14529289205	19.76941	37.37008094684765	true	Random	Z	WLALH	2023-12-11	2023-12-14T03:24:58	479.38
 81	726	66	9327218218	81.50363	39.9702863173827	true	Random	X	WODRP	2023-11-28	2023-12-23T13:25:58	561.98
 82	107	51	1358006007	78.36581	46.09413324325159	true	Random	C	IPNQU	2023-12-01	2023-12-14T05:41:58	417.17
 82	133	60	4616538638	88.8813	30.82745983013354	true	Random	W	KPIJE	2023-12-20	2023-12-01T07:57:58	583.41
-82	531	44	10642962933	26.818586	23.851865471979615	false	Random	F	NMQOD	2023-12-13	2023-12-18T19:34:58	861.78
+82	531	44	10642962933	26.81859	23.85186547197961	false	Random	F	NMQOD	2023-12-13	2023-12-18T19:34:58	861.78
 82	603	60	9083469993	81.24088	44.46228092092543	true	Random	Y	WTQGU	2023-11-30	2023-11-28T13:18:58	1448.45
 82	982	62	8955063933	81.2855	78.30439669511465	true	Random	J	SOCOT	2023-12-02	2023-12-02T21:17:58	814.60
-83	700	46	4569093424	50.063602	47.75811273142146	false	Random	R	TEGAY	2023-12-19	2023-12-07T06:46:58	760.22
-84	427	60	9035762847	81.971306	28.37315065501099	true	Random	L	FETYF	2023-12-01	2023-11-24T15:00:58	1267.12
+83	700	46	4569093424	50.0636	47.75811273142146	false	Random	R	TEGAY	2023-12-19	2023-12-07T06:46:58	760.22
+84	427	60	9035762847	81.97131	28.37315065501099	true	Random	L	FETYF	2023-12-01	2023-11-24T15:00:58	1267.12
 85	375	63	6797318130	85.47522	58.16330728665678	true	Random	E	UNZLS	2023-12-01	2023-12-04T05:17:58	1949.48
-85	845	42	2373712244	74.551315	79.15491248184088	false	Random	B	QJRKO	2023-11-29	2023-12-04T09:20:58	317.17
-85	873	18	7233488476	33.83051	31.655950581225508	false	Random	N	RJTIB	2023-11-23	2023-12-11T15:07:58	1249.52
-86	398	27	13222936963	20.387327	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
+85	845	42	2373712244	74.55132	79.15491248184088	false	Random	B	QJRKO	2023-11-29	2023-12-04T09:20:58	317.17
+85	873	18	7233488476	33.83051	31.65595058122551	false	Random	N	RJTIB	2023-11-23	2023-12-11T15:07:58	1249.52
+86	398	27	13222936963	20.38733	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
 86	662	53	8875065706	28.64778	30.6775849729486	false	Random	N	YNQAY	2023-12-15	2023-11-24T21:56:58	1108.35
-86	728	18	13390353484	61.060482	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
-86	998	74	11080891106	82.568756	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
+86	728	18	13390353484	61.06048	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
+86	998	74	11080891106	82.56876	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
 87	145	64	9022533179	37.80205	63.26081178595084	true	Random	T	PEOPK	2023-12-08	2023-12-07T17:41:58	1167.05
-87	641	64	4786767059	14.765089	70.8793353664754	false	Random	W	SQHGN	2023-12-12	2023-12-24T01:19:58	1316.61
-88	274	41	14108849690	73.74919	42.625751442467404	true	Random	X	BVRFA	2023-12-01	2023-11-25T14:32:58	515.18
-88	728	59	8439434199	30.372904	59.410283344764366	false	Random	F	JODWY	2023-12-04	2023-12-01T07:57:58	1753.88
+87	641	64	4786767059	14.76509	70.8793353664754	false	Random	W	SQHGN	2023-12-12	2023-12-24T01:19:58	1316.61
+88	274	41	14108849690	73.74919	42.6257514424674	true	Random	X	BVRFA	2023-12-01	2023-11-25T14:32:58	515.18
+88	728	59	8439434199	30.3729	59.41028334476437	false	Random	F	JODWY	2023-12-04	2023-12-01T07:57:58	1753.88
 88	765	69	9753682777	83.42646	25.99260711248508	true	Random	M	MEJAX	2023-11-25	2023-12-20T09:21:58	1647.22
-89	129	64	6400162051	67.910965	80.48074661432221	true	Random	Y	ZXJWQ	2023-12-16	2023-12-19T10:23:58	1882.65
-89	377	22	14340881803	32.61157	82.5503801214006	false	Random	K	ACYZU	2023-12-01	2023-11-27T02:05:58	672.13
-89	964	41	12706120446	69.484116	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
-9	113	7	6162580854	11.346889	46.82839094332704	false	Random	A	SJTAF	2023-12-14	2023-11-23T18:27:58	1610.49
+89	129	64	6400162051	67.91096	80.48074661432221	true	Random	Y	ZXJWQ	2023-12-16	2023-12-19T10:23:58	1882.65
+89	377	22	14340881803	32.61157	82.55038012140059	false	Random	K	ACYZU	2023-12-01	2023-11-27T02:05:58	672.13
+89	964	41	12706120446	69.48412	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
+9	113	7	6162580854	11.34689	46.82839094332704	false	Random	A	SJTAF	2023-12-14	2023-11-23T18:27:58	1610.49
 9	268	59	8149280252	86.66627	70.91298799618343	false	Random	E	PVKYK	2023-12-21	2023-11-25T00:28:58	263.17
 9	900	45	9000000000	50.0	60.5	true	Ninth	I	Iota	2023-10-14	2023-10-14T22:15	901.23
 9	907	24	6113036809	66.06377	50.26485838775805	true	Random	X	XLPOL	2023-11-23	2023-12-02T09:03:58	256.61
 90	391	26	12874761259	21.49042	53.46850617467312	true	Random	Q	QTJPE	2023-12-17	2023-12-03T17:40:58	748.05
-91	389	11	14784237986	11.174142	27.692284427565397	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
+91	389	11	14784237986	11.17414	27.6922844275654	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
 91	528	68	14588592231	77.4651	88.92064181463138	false	Random	U	JXZUA	2023-12-16	2023-12-21T02:28:58	1834.07
-92	344	29	5182139341	31.653255	44.26814517218887	true	Random	F	NGHOS	2023-12-06	2023-12-09T21:25:58	1291.06
-93	887	20	13555948969	70.57364	32.621532934876804	false	Random	D	SPMEK	2023-11-26	2023-12-20T18:11:58	258.86
-94	216	49	8773264156	81.617195	43.03983700523827	true	Random	D	VHWYT	2023-12-13	2023-11-30T07:03:58	1178.27
-94	693	60	4818659234	26.04229	83.2975107272106	true	Random	B	ENSQO	2023-12-22	2023-12-12T06:08:58	1283.81
-95	560	62	1389447643	19.202044	85.46518830161321	true	Random	S	LQRRB	2023-12-16	2023-12-12T06:12:58	445.65
-96	595	72	11506136303	21.917727	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
-96	637	39	5516035994	55.90832	60.522041012562816	true	Random	O	YPETL	2023-12-02	2023-11-28T02:47:58	1175.16
-97	415	74	10346322649	21.667427	46.58901867647463	false	Random	R	KWFOF	2023-12-21	2023-11-27T12:18:58	1157.72
+92	344	29	5182139341	31.65326	44.26814517218887	true	Random	F	NGHOS	2023-12-06	2023-12-09T21:25:58	1291.06
+93	887	20	13555948969	70.57364	32.6215329348768	false	Random	D	SPMEK	2023-11-26	2023-12-20T18:11:58	258.86
+94	216	49	8773264156	81.6172	43.03983700523827	true	Random	D	VHWYT	2023-12-13	2023-11-30T07:03:58	1178.27
+94	693	60	4818659234	26.04229	83.29751072721059	true	Random	B	ENSQO	2023-12-22	2023-12-12T06:08:58	1283.81
+95	560	62	1389447643	19.20204	85.46518830161321	true	Random	S	LQRRB	2023-12-16	2023-12-12T06:12:58	445.65
+96	595	72	11506136303	21.91773	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
+96	637	39	5516035994	55.90832	60.52204101256282	true	Random	O	YPETL	2023-12-02	2023-11-28T02:47:58	1175.16
+97	415	74	10346322649	21.66743	46.58901867647463	false	Random	R	KWFOF	2023-12-21	2023-11-27T12:18:58	1157.72
 97	839	60	14818779777	46.17389	68.98285340004992	false	Random	W	HMFPU	2023-12-01	2023-12-04T08:41:58	1683.48
-98	228	65	4782017237	55.10206	31.414570993700565	true	Random	P	EOIFT	2023-12-07	2023-12-15T08:12:58	137.49
-99	632	39	8911195323	74.581276	78.2764804276292	false	Random	Q	WTQCL	2023-12-02	2023-12-05T09:18:58	200.21
+98	228	65	4782017237	55.10206	31.41457099370056	true	Random	P	EOIFT	2023-12-07	2023-12-15T08:12:58	137.49
+99	632	39	8911195323	74.58128	78.2764804276292	false	Random	Q	WTQCL	2023-12-02	2023-12-05T09:18:58	200.21
 
 -- !q44 --
-17	289	49	13560709243	39.952793	38.245306832599425	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
-17	499	46	11230409207	51.632103	28.811164197154774	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
+17	289	49	13560709243	39.95279	38.24530683259943	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
+17	499	46	11230409207	51.6321	28.81116419715477	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
 17	646	62	11234805830	76.40492	67.46425239009778	true	Random	N	REHZC	2023-12-09	2023-11-28T02:06:58	365.15
 17	698	55	1807368797	20.17171	43.84496606184709	true	Random	P	SHSJV	2023-12-01	2023-11-25T11:56:58	810.95
 17	794	14	8377523030	28.07663	52.3837762020057	false	Random	E	WPMIN	2023-12-03	2023-11-26T04:59:58	239.42
 17	913	32	4647929554	78.91502	70.54487265463735	true	Random	S	WFPNS	2023-11-27	2023-11-26T03:29:58	321.45
 
 -- !q45 --
-11	1100	55	11000000000	60.5	70.0	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
-11	487	27	14556302216	85.33334	62.596750833474495	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
+11	1100	55	11000000000	60.5	70	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
+11	487	27	14556302216	85.33334	62.59675083347449	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
 12	1200	60	12000000000	65.75	75.25	false	Twelfth	L	Mu	2023-10-17	2023-10-17T02:15	1234.56
-12	751	8	12205294947	23.468674	64.35048302450815	true	Random	K	FCSBV	2023-12-03	2023-12-17T01:10:58	325.26
+12	751	8	12205294947	23.46867	64.35048302450815	true	Random	K	FCSBV	2023-12-03	2023-12-17T01:10:58	325.26
 13	1300	65	13000000000	70.0	80.5	true	Thirteenth	M	Nu	2023-10-18	2023-10-18T03:30	1345.67
-13	335	39	13869202091	30.426075	39.02304533093442	true	Random	L	AULCC	2023-12-08	2023-12-13T00:26:58	387.97
+13	335	39	13869202091	30.42607	39.02304533093442	true	Random	L	AULCC	2023-12-08	2023-12-13T00:26:58	387.97
 13	402	30	10851194313	74.82481	74.90108005771035	false	Random	F	GEMMK	2023-11-27	2023-12-21T15:03:58	1643.55
-13	745	48	13047949175	51.168613	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
+13	745	48	13047949175	51.16861	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
 14	1400	70	14000000000	75.25	85.75	false	Fourteenth	N	Xi	2023-10-19	2023-10-19T04:45	1456.78
-14	968	16	11314514196	62.509666	33.1841427251225	false	Random	T	WDEVJ	2023-11-24	2023-12-06T17:54:58	431.61
-15	1500	75	15000000000	80.5	90.0	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
+14	968	16	11314514196	62.50967	33.1841427251225	false	Random	T	WDEVJ	2023-11-24	2023-12-06T17:54:58	431.61
+15	1500	75	15000000000	80.5	90	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
 16	615	20	12294128025	77.37379	20.42772029677839	true	Random	U	JHPOB	2023-11-30	2023-12-16T14:29:58	1105.33
-17	289	49	13560709243	39.952793	38.245306832599425	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
-17	499	46	11230409207	51.632103	28.811164197154774	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
+17	289	49	13560709243	39.95279	38.24530683259943	true	Random	Q	QEYVY	2023-12-19	2023-12-07T00:35:58	500.19
+17	499	46	11230409207	51.6321	28.81116419715477	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
 17	646	62	11234805830	76.40492	67.46425239009778	true	Random	N	REHZC	2023-12-09	2023-11-28T02:06:58	365.15
-18	835	17	14265814864	18.923101	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
-22	200	41	12163439252	64.621254	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
-28	655	21	14580233860	12.503378	48.60220286874443	false	Random	P	DUBQQ	2023-12-12	2023-12-03T20:11:58	922.42
-30	830	65	12624057029	38.791172	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
-31	990	5	13678786851	15.762894	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
+18	835	17	14265814864	18.9231	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
+22	200	41	12163439252	64.62125	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
+28	655	21	14580233860	12.50338	48.60220286874443	false	Random	P	DUBQQ	2023-12-12	2023-12-03T20:11:58	922.42
+30	830	65	12624057029	38.79117	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
+31	990	5	13678786851	15.76289	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
 33	198	20	13225406950	67.7327	58.63863378877107	true	Random	I	ZKXRA	2023-12-07	2023-11-29T02:33:58	566.76
 33	321	39	12537851805	38.26871	32.6626492245712	true	Random	S	OICCE	2023-12-02	2023-12-19T16:41:58	306.92
 34	145	44	14060350663	73.02436	68.40544929600975	true	Random	S	UUJFP	2023-11-23	2023-12-12T06:08:58	739.45
 38	606	57	14585148556	82.67463	79.18300302689997	false	Random	E	RSFUZ	2023-12-16	2023-11-27T18:55:58	970.25
 4	122	24	10738473173	81.15482	60.21481394154484	false	Random	Y	PQJRK	2023-12-20	2023-12-09T02:38:58	1467.35
-40	230	34	10824964541	16.929768	53.812277279703366	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
+40	230	34	10824964541	16.92977	53.81227727970337	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
 40	693	69	13276482882	44.35974	82.57845708670757	true	Random	B	RCCSU	2023-11-29	2023-12-01T20:11:58	183.64
-41	344	34	14536795918	56.660946	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
+41	344	34	14536795918	56.66095	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
 41	708	64	11745827370	72.84812	35.31028363777645	true	Random	O	WGSQC	2023-12-02	2023-11-25T17:07:58	1666.71
-42	192	28	14454791024	35.465202	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
+42	192	28	14454791024	35.4652	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
 42	355	72	11536856285	74.42886	53.49032479461299	false	Random	I	IQZEI	2023-12-10	2023-12-06T07:17:58	1098.14
 43	828	24	12011396947	45.07647	54.2136449479346	true	Random	E	HIDUO	2023-12-02	2023-12-19T01:14:58	233.10
-45	455	25	12639246000	47.011307	26.310712594958694	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
+45	455	25	12639246000	47.01131	26.31071259495869	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
 47	838	73	14910230294	83.69784	82.28901816600579	true	Random	L	SHXYL	2023-11-24	2023-12-05T22:19:58	1062.15
 48	898	59	12871187130	10.13838	70.19705104611333	true	Random	J	WFXNN	2023-12-23	2023-12-17T02:53:58	1050.21
-5	823	63	13328808917	77.768196	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
+5	823	63	13328808917	77.7682	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
 51	778	59	13914307584	27.48499	91.47665081887983	true	Random	X	FGFHK	2023-12-01	2023-12-10T03:24:58	402.63
-51	898	32	13510411411	18.679659	21.406761033351007	false	Random	L	FECUW	2023-12-10	2023-12-14T02:00:58	700.43
-52	811	31	14085958816	51.067017	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
+51	898	32	13510411411	18.67966	21.40676103335101	false	Random	L	FECUW	2023-12-10	2023-12-14T02:00:58	700.43
+52	811	31	14085958816	51.06702	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
 53	715	29	10917905565	41.83069	93.50885201221966	true	Random	U	TRLSY	2023-12-03	2023-11-26T15:13:58	369.72
-54	467	42	13684826428	38.491455	90.10566649802195	true	Random	M	ERFBG	2023-11-24	2023-12-02T16:23:58	211.00
+54	467	42	13684826428	38.49146	90.10566649802195	true	Random	M	ERFBG	2023-11-24	2023-12-02T16:23:58	211.00
 55	908	24	13623721787	40.06427	90.85281792731746	false	Random	B	KFZGI	2023-11-27	2023-12-23T18:06:58	1124.95
-55	964	8	14038541765	70.24135	20.034551391620194	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
-57	936	26	12164628867	56.541275	56.276679149397076	true	Random	O	IPHPZ	2023-12-13	2023-11-30T22:36:58	603.68
-60	875	42	14283877167	48.811504	67.0706975606688	true	Random	P	VJOZH	2023-12-06	2023-12-15T05:20:58	781.71
-61	267	61	11407448558	12.877184	42.144845857251944	true	Random	B	NRWNW	2023-11-30	2023-11-25T09:34:58	859.85
-61	414	63	14506877706	12.540966	58.04557426323987	false	Random	H	NUOAD	2023-12-10	2023-12-06T22:52:58	780.50
-62	451	50	12304139502	51.151623	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
+55	964	8	14038541765	70.24135	20.03455139162019	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
+57	936	26	12164628867	56.54128	56.27667914939708	true	Random	O	IPHPZ	2023-12-13	2023-11-30T22:36:58	603.68
+60	875	42	14283877167	48.8115	67.0706975606688	true	Random	P	VJOZH	2023-12-06	2023-12-15T05:20:58	781.71
+61	267	61	11407448558	12.87718	42.14484585725194	true	Random	B	NRWNW	2023-11-30	2023-11-25T09:34:58	859.85
+61	414	63	14506877706	12.54097	58.04557426323987	false	Random	H	NUOAD	2023-12-10	2023-12-06T22:52:58	780.50
+62	451	50	12304139502	51.15162	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
 63	112	75	12197306353	85.90137	43.48931389222043	false	Random	C	KKAIT	2023-11-27	2023-12-23T04:23:58	1954.90
 64	678	14	13681447851	74.83621	36.94143092647816	true	Random	J	KELFB	2023-12-01	2023-12-07T18:14:58	308.26
-66	306	5	14448160602	44.642223	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
-67	484	65	10817432713	62.168163	77.02869166077757	true	Random	K	SAJMG	2023-12-19	2023-12-14T19:47:58	488.01
-68	922	13	11664232196	72.683266	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
-76	504	70	14161652666	58.071503	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
-77	165	36	12887722637	19.729382	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
+66	306	5	14448160602	44.64222	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
+67	484	65	10817432713	62.16816	77.02869166077757	true	Random	K	SAJMG	2023-12-19	2023-12-14T19:47:58	488.01
+68	922	13	11664232196	72.68327	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
+76	504	70	14161652666	58.0715	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
+77	165	36	12887722637	19.72938	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
 8	550	48	13655992126	52.90345	51.35114230137935	false	Random	X	JTVSE	2023-12-13	2023-12-15T03:49:58	361.55
 8	866	37	13672147880	81.28999	67.66548594336737	false	Random	H	QDJIM	2023-12-14	2023-12-17T18:44:58	1112.05
-80	815	19	14529289205	19.769405	37.37008094684765	true	Random	Z	WLALH	2023-12-11	2023-12-14T03:24:58	479.38
-86	398	27	13222936963	20.387327	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
-86	728	18	13390353484	61.060482	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
-86	998	74	11080891106	82.568756	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
-88	274	41	14108849690	73.74919	42.625751442467404	true	Random	X	BVRFA	2023-12-01	2023-11-25T14:32:58	515.18
-89	377	22	14340881803	32.61157	82.5503801214006	false	Random	K	ACYZU	2023-12-01	2023-11-27T02:05:58	672.13
-89	964	41	12706120446	69.484116	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
+80	815	19	14529289205	19.76941	37.37008094684765	true	Random	Z	WLALH	2023-12-11	2023-12-14T03:24:58	479.38
+86	398	27	13222936963	20.38733	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
+86	728	18	13390353484	61.06048	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
+86	998	74	11080891106	82.56876	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
+88	274	41	14108849690	73.74919	42.6257514424674	true	Random	X	BVRFA	2023-12-01	2023-11-25T14:32:58	515.18
+89	377	22	14340881803	32.61157	82.55038012140059	false	Random	K	ACYZU	2023-12-01	2023-11-27T02:05:58	672.13
+89	964	41	12706120446	69.48412	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
 90	391	26	12874761259	21.49042	53.46850617467312	true	Random	Q	QTJPE	2023-12-17	2023-12-03T17:40:58	748.05
-91	389	11	14784237986	11.174142	27.692284427565397	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
+91	389	11	14784237986	11.17414	27.6922844275654	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
 91	528	68	14588592231	77.4651	88.92064181463138	false	Random	U	JXZUA	2023-12-16	2023-12-21T02:28:58	1834.07
-93	887	20	13555948969	70.57364	32.621532934876804	false	Random	D	SPMEK	2023-11-26	2023-12-20T18:11:58	258.86
-96	595	72	11506136303	21.917727	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
+93	887	20	13555948969	70.57364	32.6215329348768	false	Random	D	SPMEK	2023-11-26	2023-12-20T18:11:58	258.86
+96	595	72	11506136303	21.91773	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
 97	839	60	14818779777	46.17389	68.98285340004992	false	Random	W	HMFPU	2023-12-01	2023-12-04T08:41:58	1683.48
 
 -- !q46 --
-1	578	55	2111222273	56.858597	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
-29	910	52	5544039917	22.179396	46.32732226806482	true	Random	C	TIZAG	2023-11-28	2023-12-14T16:08:58	900.96
-3	300	15	3000000000	20.25	30.0	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
-43	178	64	6969956763	40.980415	52.998828731408516	true	Random	C	XQHYB	2023-12-11	2023-12-07T23:00:58	257.08
-69	416	14	7702410607	31.638903	89.5793904314531	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
+1	578	55	2111222273	56.8586	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
+29	910	52	5544039917	22.1794	46.32732226806482	true	Random	C	TIZAG	2023-11-28	2023-12-14T16:08:58	900.96
+3	300	15	3000000000	20.25	30	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
+43	178	64	6969956763	40.98042	52.99882873140852	true	Random	C	XQHYB	2023-12-11	2023-12-07T23:00:58	257.08
+69	416	14	7702410607	31.6389	89.57939043145311	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
 82	107	51	1358006007	78.36581	46.09413324325159	true	Random	C	IPNQU	2023-12-01	2023-12-14T05:41:58	417.17
 
 -- !q47 --
-1	578	55	2111222273	56.858597	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
-1	979	44	10163954251	28.827957	57.56879940298416	true	Random	Q	DNRGE	2023-12-09	2023-12-10T20:21:58	1581.25
+1	578	55	2111222273	56.8586	82.38111658179561	true	Random	C	LYDUG	2023-12-17	2023-12-05T13:04:58	1393.11
+1	979	44	10163954251	28.82796	57.56879940298416	true	Random	Q	DNRGE	2023-12-09	2023-12-10T20:21:58	1581.25
 10	1000	50	10000000000	55.25	65.75	false	Tenth	J	Kappa	2023-10-15	2023-10-15T23:30	1012.34
-10	966	38	2203748112	45.555325	27.908447208440094	true	Random	W	LFAGO	2023-12-14	2023-11-26T20:00:58	1898.68
-11	1100	55	11000000000	60.5	70.0	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
-11	441	19	7370044350	74.261696	62.013817404758086	true	Random	D	UYKZA	2023-12-23	2023-12-15T11:49:58	1805.14
-11	487	27	14556302216	85.33334	62.596750833474495	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
+10	966	38	2203748112	45.55532	27.90844720844009	true	Random	W	LFAGO	2023-12-14	2023-11-26T20:00:58	1898.68
+11	1100	55	11000000000	60.5	70	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
+11	441	19	7370044350	74.2617	62.01381740475809	true	Random	D	UYKZA	2023-12-23	2023-12-15T11:49:58	1805.14
+11	487	27	14556302216	85.33334	62.59675083347449	true	Random	E	QMHJD	2023-12-23	2023-12-24T08:30:58	1491.22
 12	1200	60	12000000000	65.75	75.25	false	Twelfth	L	Mu	2023-10-17	2023-10-17T02:15	1234.56
 13	1300	65	13000000000	70.0	80.5	true	Thirteenth	M	Nu	2023-10-18	2023-10-18T03:30	1345.67
 13	402	30	10851194313	74.82481	74.90108005771035	false	Random	F	GEMMK	2023-11-27	2023-12-21T15:03:58	1643.55
-13	696	74	3370487489	84.544014	88.69976219408227	true	Random	H	RTFJI	2023-11-23	2023-11-25T07:32:58	1761.50
-13	745	48	13047949175	51.168613	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
-13	859	65	7433576046	56.136265	34.87823331022725	false	Random	L	CRFUF	2023-12-23	2023-12-12T15:05:58	1037.15
+13	696	74	3370487489	84.54401	88.69976219408227	true	Random	H	RTFJI	2023-11-23	2023-11-25T07:32:58	1761.50
+13	745	48	13047949175	51.16861	85.21972389262197	true	Random	A	AYBWQ	2023-12-22	2023-12-22T16:25:58	1192.48
+13	859	65	7433576046	56.13626	34.87823331022725	false	Random	L	CRFUF	2023-12-23	2023-12-12T15:05:58	1037.15
 14	1400	70	14000000000	75.25	85.75	false	Fourteenth	N	Xi	2023-10-19	2023-10-19T04:45	1456.78
-14	195	17	2370700139	16.777058	64.81793301410002	false	Random	P	IIGRE	2023-12-12	2023-12-14T22:40:58	1678.44
-14	966	65	7828602539	62.430664	68.85873133439297	true	Random	I	VVOQH	2023-12-01	2023-12-06T00:54:58	1300.43
-15	1500	75	15000000000	80.5	90.0	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
-16	135	22	7901304568	43.944805	85.16901944253635	true	Random	K	NUQEP	2023-11-29	2023-11-25T23:42:58	1440.74
+14	195	17	2370700139	16.77706	64.81793301410002	false	Random	P	IIGRE	2023-12-12	2023-12-14T22:40:58	1678.44
+14	966	65	7828602539	62.43066	68.85873133439297	true	Random	I	VVOQH	2023-12-01	2023-12-06T00:54:58	1300.43
+15	1500	75	15000000000	80.5	90	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
+16	135	22	7901304568	43.94481	85.16901944253635	true	Random	K	NUQEP	2023-11-29	2023-11-25T23:42:58	1440.74
 16	615	20	12294128025	77.37379	20.42772029677839	true	Random	U	JHPOB	2023-11-30	2023-12-16T14:29:58	1105.33
-17	499	46	11230409207	51.632103	28.811164197154774	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
-18	690	17	1399456103	63.261967	42.964715823771236	true	Random	R	BWSRS	2023-12-13	2023-12-23T08:33:58	1840.02
-18	835	17	14265814864	18.923101	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
-19	917	66	2340946367	89.035675	22.649362455875274	false	Random	D	HWHMU	2023-11-30	2023-12-10T02:36:58	1960.07
-19	993	13	7039833438	79.769066	69.79049291517285	true	Random	X	OFSUV	2023-12-11	2023-12-08T01:46:58	1958.95
+17	499	46	11230409207	51.6321	28.81116419715477	false	Random	V	BVLUH	2023-12-13	2023-12-23T17:59:58	1387.62
+18	690	17	1399456103	63.26197	42.96471582377124	true	Random	R	BWSRS	2023-12-13	2023-12-23T08:33:58	1840.02
+18	835	17	14265814864	18.9231	80.53531451138412	true	Random	V	PIKUZ	2023-12-20	2023-12-21T07:39:58	1167.09
+19	917	66	2340946367	89.03568	22.64936245587527	false	Random	D	HWHMU	2023-11-30	2023-12-10T02:36:58	1960.07
+19	993	13	7039833438	79.76907	69.79049291517285	true	Random	X	OFSUV	2023-12-11	2023-12-08T01:46:58	1958.95
 2	921	62	8557914543	78.52379	58.6849882881372	false	Random	D	KBXXS	2023-12-07	2023-12-02T22:24:58	1782.88
-2	925	46	6013180177	41.107002	34.86561026061906	true	Random	L	XLLXY	2023-12-06	2023-12-09T14:04:58	1246.26
-20	248	64	7704906572	35.089928	76.69128821479936	true	Random	T	KQOMS	2023-11-30	2023-12-11T01:35:58	1799.26
-22	200	41	12163439252	64.621254	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
-23	192	8	5102667616	54.111057	40.85713971600841	false	Random	J	EBXEB	2023-12-13	2023-12-10T11:32:58	1824.12
+2	925	46	6013180177	41.107	34.86561026061906	true	Random	L	XLLXY	2023-12-06	2023-12-09T14:04:58	1246.26
+20	248	64	7704906572	35.08993	76.69128821479936	true	Random	T	KQOMS	2023-11-30	2023-12-11T01:35:58	1799.26
+22	200	41	12163439252	64.62125	81.68574929661384	true	Random	U	KGVNU	2023-12-20	2023-11-30T14:56:58	1915.47
+23	192	8	5102667616	54.11106	40.85713971600841	false	Random	J	EBXEB	2023-12-13	2023-12-10T11:32:58	1824.12
 27	866	24	5531365994	72.77447	86.96690821165853	false	Random	S	TZPFJ	2023-11-28	2023-12-13T15:31:58	1274.75
-29	157	34	2302882987	51.924015	20.311140937696468	true	Random	R	MBOXJ	2023-12-02	2023-12-03T14:12:58	1620.80
-29	923	57	1591814253	68.57371	33.342802789892986	true	Random	Q	ZONGC	2023-12-20	2023-12-13T09:11:58	1465.38
-3	259	74	7422478791	22.291426	75.38227773520089	true	Random	S	VWAXJ	2023-12-01	2023-12-05T21:23:58	1970.57
-30	292	71	10308444223	63.039078	76.40649540444898	false	Random	G	DRLHY	2023-12-19	2023-12-14T15:32:58	1165.14
-30	830	65	12624057029	38.791172	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
+29	157	34	2302882987	51.92402	20.31114093769647	true	Random	R	MBOXJ	2023-12-02	2023-12-03T14:12:58	1620.80
+29	923	57	1591814253	68.57371	33.34280278989299	true	Random	Q	ZONGC	2023-12-20	2023-12-13T09:11:58	1465.38
+3	259	74	7422478791	22.29143	75.38227773520089	true	Random	S	VWAXJ	2023-12-01	2023-12-05T21:23:58	1970.57
+30	292	71	10308444223	63.03908	76.40649540444898	false	Random	G	DRLHY	2023-12-19	2023-12-14T15:32:58	1165.14
+30	830	65	12624057029	38.79117	59.72899174862661	false	Random	A	LFPWP	2023-12-03	2023-12-17T00:10:58	1760.62
 31	395	22	6141426904	88.37914	52.0655270963123	false	Random	J	DRPJV	2023-12-07	2023-11-29T03:15:58	1076.41
-31	990	5	13678786851	15.762894	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
-39	726	50	3865644066	26.225628	28.534393094364418	false	Random	F	NIUCS	2023-12-05	2023-12-04T19:31:58	1953.82
+31	990	5	13678786851	15.76289	85.24173385692956	false	Random	H	THGIM	2023-12-14	2023-12-09T01:24:58	1834.37
+39	726	50	3865644066	26.22563	28.53439309436442	false	Random	F	NIUCS	2023-12-05	2023-12-04T19:31:58	1953.82
 4	122	24	10738473173	81.15482	60.21481394154484	false	Random	Y	PQJRK	2023-12-20	2023-12-09T02:38:58	1467.35
-4	569	72	10560903405	50.255936	47.535145739285184	false	Random	O	NRIRC	2023-12-05	2023-12-01T09:10:58	1986.99
-40	230	34	10824964541	16.929768	53.812277279703366	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
-40	914	7	4902128502	19.442041	33.099787387344406	true	Random	Q	KOCWA	2023-11-28	2023-12-21T09:20:58	1824.80
-41	344	34	14536795918	56.660946	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
-41	697	21	1200243566	12.466168	68.57243624557165	true	Random	U	JZGEG	2023-12-03	2023-12-10T04:51:58	1323.88
+4	569	72	10560903405	50.25594	47.53514573928518	false	Random	O	NRIRC	2023-12-05	2023-12-01T09:10:58	1986.99
+40	230	34	10824964541	16.92977	53.81227727970337	false	Random	F	YDQHF	2023-12-14	2023-12-03T17:42:58	1623.79
+40	914	7	4902128502	19.44204	33.09978738734441	true	Random	Q	KOCWA	2023-11-28	2023-12-21T09:20:58	1824.80
+41	344	34	14536795918	56.66095	84.15108995619764	false	Random	Q	KYLCH	2023-12-10	2023-12-04T08:25:58	1902.09
+41	697	21	1200243566	12.46617	68.57243624557165	true	Random	U	JZGEG	2023-12-03	2023-12-10T04:51:58	1323.88
 41	708	64	11745827370	72.84812	35.31028363777645	true	Random	O	WGSQC	2023-12-02	2023-11-25T17:07:58	1666.71
 42	178	38	7559404453	69.69449	64.37154501388798	true	Random	G	QUMUN	2023-12-14	2023-12-17T01:37:58	1190.44
-42	192	28	14454791024	35.465202	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
+42	192	28	14454791024	35.4652	46.34876515635648	false	Random	W	NQFGR	2023-12-04	2023-11-24T05:02:58	1428.02
 42	355	72	11536856285	74.42886	53.49032479461299	false	Random	I	IQZEI	2023-12-10	2023-12-06T07:17:58	1098.14
 44	219	38	8596488294	73.52956	94.10797854680568	true	Random	E	HMWBI	2023-12-15	2023-12-06T00:51:58	1907.47
-44	694	55	3626514138	62.504086	72.89799265418553	true	Random	Z	JTDVF	2023-12-01	2023-11-29T12:08:58	1769.92
-45	455	25	12639246000	47.011307	26.310712594958694	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
-45	492	43	3870916386	51.069588	42.652270406300794	true	Random	H	JVZTB	2023-12-04	2023-12-09T21:06:58	1517.83
-47	508	48	1456473942	48.488297	20.377955902326608	false	Random	B	CAOEY	2023-11-29	2023-12-10T14:49:58	1865.52
-47	566	50	1426586688	51.278687	40.47151456873397	true	Random	F	YBOSH	2023-11-26	2023-12-15T03:44:58	1806.35
+44	694	55	3626514138	62.50409	72.89799265418553	true	Random	Z	JTDVF	2023-12-01	2023-11-29T12:08:58	1769.92
+45	455	25	12639246000	47.01131	26.31071259495869	false	Random	Z	GGEUA	2023-11-27	2023-12-01T20:41:58	1698.21
+45	492	43	3870916386	51.06959	42.65227040630079	true	Random	H	JVZTB	2023-12-04	2023-12-09T21:06:58	1517.83
+47	508	48	1456473942	48.4883	20.37795590232661	false	Random	B	CAOEY	2023-11-29	2023-12-10T14:49:58	1865.52
+47	566	50	1426586688	51.27869	40.47151456873397	true	Random	F	YBOSH	2023-11-26	2023-12-15T03:44:58	1806.35
 47	838	73	14910230294	83.69784	82.28901816600579	true	Random	L	SHXYL	2023-11-24	2023-12-05T22:19:58	1062.15
 48	898	59	12871187130	10.13838	70.19705104611333	true	Random	J	WFXNN	2023-12-23	2023-12-17T02:53:58	1050.21
-49	412	16	8300982793	56.263252	66.07893608061771	false	Random	K	DWWJI	2023-12-08	2023-12-17T11:32:58	1718.54
-49	568	70	2916596630	79.16303	56.114316916863025	false	Random	T	ILLIU	2023-11-23	2023-12-07T11:05:58	1039.03
-5	768	5	4152322228	41.128906	78.60686390712706	false	Random	J	LXKRA	2023-12-05	2023-11-24T18:13:58	1941.98
-5	823	63	13328808917	77.768196	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
-52	811	31	14085958816	51.067017	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
-54	827	55	7054839267	58.555687	25.891004802115663	false	Random	O	ASMLW	2023-12-13	2023-12-20T16:41:58	1369.32
-54	843	34	9547939940	38.66475	36.370944299232434	true	Random	P	NTVIR	2023-12-12	2023-12-02T06:45:58	1628.37
+49	412	16	8300982793	56.26325	66.07893608061771	false	Random	K	DWWJI	2023-12-08	2023-12-17T11:32:58	1718.54
+49	568	70	2916596630	79.16303	56.11431691686303	false	Random	T	ILLIU	2023-11-23	2023-12-07T11:05:58	1039.03
+5	768	5	4152322228	41.12891	78.60686390712706	false	Random	J	LXKRA	2023-12-05	2023-11-24T18:13:58	1941.98
+5	823	63	13328808917	77.7682	22.87975226738422	false	Random	F	OIYPV	2023-12-11	2023-12-14T06:43:58	1144.38
+52	811	31	14085958816	51.06702	65.01991893789116	true	Random	A	CODYQ	2023-12-03	2023-12-07T23:25:58	1797.21
+54	827	55	7054839267	58.55569	25.89100480211566	false	Random	O	ASMLW	2023-12-13	2023-12-20T16:41:58	1369.32
+54	843	34	9547939940	38.66475	36.37094429923243	true	Random	P	NTVIR	2023-12-12	2023-12-02T06:45:58	1628.37
 55	908	24	13623721787	40.06427	90.85281792731746	false	Random	B	KFZGI	2023-11-27	2023-12-23T18:06:58	1124.95
-55	964	8	14038541765	70.24135	20.034551391620194	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
-59	144	31	6208909394	67.417076	40.59765633709834	true	Random	D	FLWNA	2023-12-12	2023-12-19T06:17:58	1870.24
+55	964	8	14038541765	70.24135	20.03455139162019	false	Random	J	AYXIT	2023-12-13	2023-12-16T19:38:58	1476.73
+59	144	31	6208909394	67.41708	40.59765633709834	true	Random	D	FLWNA	2023-12-12	2023-12-19T06:17:58	1870.24
 59	509	50	5501336408	39.94401	73.35770882761237	true	Random	I	PVZNO	2023-12-04	2023-11-27T04:40:58	1177.33
-60	711	69	1493870104	22.574188	61.30347648465907	false	Random	E	FHKVR	2023-11-27	2023-12-05T11:26:58	1981.61
-62	451	50	12304139502	51.151623	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
+60	711	69	1493870104	22.57419	61.30347648465907	false	Random	E	FHKVR	2023-11-27	2023-12-05T11:26:58	1981.61
+62	451	50	12304139502	51.15162	22.46754141558852	false	Random	C	SRRSV	2023-12-08	2023-12-20T02:48:58	1352.65
 63	112	75	12197306353	85.90137	43.48931389222043	false	Random	C	KKAIT	2023-11-27	2023-12-23T04:23:58	1954.90
-63	383	35	5161212745	39.455276	52.33267523851794	false	Random	X	TMYMC	2023-11-29	2023-12-10T09:09:58	1442.54
-63	410	33	1767102777	72.260124	56.971483381024896	false	Random	B	QXNSM	2023-12-12	2023-12-19T22:57:58	1660.73
-64	719	36	1224510454	64.237434	86.05689694804887	true	Random	E	ZVQPU	2023-11-30	2023-12-03T04:56:58	1879.25
-66	306	5	14448160602	44.642223	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
-68	266	31	8183454755	69.19586	23.139304803938643	false	Random	S	STCBM	2023-11-26	2023-12-22T13:42:58	1722.37
+63	383	35	5161212745	39.45528	52.33267523851794	false	Random	X	TMYMC	2023-11-29	2023-12-10T09:09:58	1442.54
+63	410	33	1767102777	72.26012	56.9714833810249	false	Random	B	QXNSM	2023-12-12	2023-12-19T22:57:58	1660.73
+64	719	36	1224510454	64.23743	86.05689694804887	true	Random	E	ZVQPU	2023-11-30	2023-12-03T04:56:58	1879.25
+66	306	5	14448160602	44.64222	50.24249889525751	false	Random	X	OASEB	2023-12-11	2023-11-27T00:16:58	1345.69
+68	266	31	8183454755	69.19586	23.13930480393864	false	Random	S	STCBM	2023-11-26	2023-12-22T13:42:58	1722.37
 68	756	63	5416393421	66.41538	76.32820339134415	false	Random	Y	CUNAL	2023-12-23	2023-12-14T22:49:58	1109.25
-68	922	13	11664232196	72.683266	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
-69	416	14	7702410607	31.638903	89.5793904314531	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
+68	922	13	11664232196	72.68327	37.9910331525765	false	Random	W	PPWBB	2023-11-26	2023-12-10T22:54:58	1968.89
+69	416	14	7702410607	31.6389	89.57939043145311	true	Random	C	URQMU	2023-11-25	2023-11-30T15:17:58	1379.22
 7	969	62	3451343234	57.17074	56.74513811095188	false	Random	G	OWDSC	2023-12-19	2023-12-11T17:17:58	1874.22
-70	231	67	4547989149	35.103123	51.93622592177748	true	Random	V	ZBCVY	2023-11-29	2023-12-22T11:41:58	1749.60
-70	421	23	3153379289	27.412096	79.32006404438445	false	Random	L	VLJWK	2023-12-04	2023-12-12T05:31:58	1163.35
+70	231	67	4547989149	35.10312	51.93622592177748	true	Random	V	ZBCVY	2023-11-29	2023-12-22T11:41:58	1749.60
+70	421	23	3153379289	27.4121	79.32006404438445	false	Random	L	VLJWK	2023-12-04	2023-12-12T05:31:58	1163.35
 70	751	56	7828222634	52.8313	55.7263634552559	true	Random	B	TFHMH	2023-11-30	2023-12-24T12:22:58	1166.13
-73	866	49	4618070115	46.803646	91.41305051885227	true	Random	H	ROYYF	2023-12-07	2023-12-01T10:28:58	1817.67
-76	504	70	14161652666	58.071503	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
+73	866	49	4618070115	46.80365	91.41305051885227	true	Random	H	ROYYF	2023-12-07	2023-12-01T10:28:58	1817.67
+76	504	70	14161652666	58.0715	67.99111956708262	true	Random	Y	HAVCK	2023-11-27	2023-12-14T16:08:58	1864.98
 77	131	19	2964167114	33.23181	53.35246738882714	false	Random	G	AHGFO	2023-12-19	2023-12-01T10:11:58	1837.90
-77	165	36	12887722637	19.729382	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
+77	165	36	12887722637	19.72938	45.61157603163882	true	Random	S	OZOLB	2023-12-02	2023-12-03T05:07:58	1576.79
 8	866	37	13672147880	81.28999	67.66548594336737	false	Random	H	QDJIM	2023-12-14	2023-12-17T18:44:58	1112.05
-80	267	57	8797946135	35.604717	80.51381110359165	false	Random	K	KQTEX	2023-12-09	2023-12-13T06:19:58	1769.15
+80	267	57	8797946135	35.60472	80.51381110359165	false	Random	K	KQTEX	2023-12-09	2023-12-13T06:19:58	1769.15
 82	603	60	9083469993	81.24088	44.46228092092543	true	Random	Y	WTQGU	2023-11-30	2023-11-28T13:18:58	1448.45
-84	427	60	9035762847	81.971306	28.37315065501099	true	Random	L	FETYF	2023-12-01	2023-11-24T15:00:58	1267.12
+84	427	60	9035762847	81.97131	28.37315065501099	true	Random	L	FETYF	2023-12-01	2023-11-24T15:00:58	1267.12
 85	375	63	6797318130	85.47522	58.16330728665678	true	Random	E	UNZLS	2023-12-01	2023-12-04T05:17:58	1949.48
-85	873	18	7233488476	33.83051	31.655950581225508	false	Random	N	RJTIB	2023-11-23	2023-12-11T15:07:58	1249.52
-86	398	27	13222936963	20.387327	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
+85	873	18	7233488476	33.83051	31.65595058122551	false	Random	N	RJTIB	2023-11-23	2023-12-11T15:07:58	1249.52
+86	398	27	13222936963	20.38733	44.51255195842424	true	Random	T	ZCRFI	2023-12-21	2023-12-23T12:04:58	1801.53
 86	662	53	8875065706	28.64778	30.6775849729486	false	Random	N	YNQAY	2023-12-15	2023-11-24T21:56:58	1108.35
-86	728	18	13390353484	61.060482	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
-86	998	74	11080891106	82.568756	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
+86	728	18	13390353484	61.06048	87.44751616093882	false	Random	J	BUCVI	2023-12-07	2023-12-14T23:00:58	1611.17
+86	998	74	11080891106	82.56876	32.0122101203062	true	Random	K	VAAMT	2023-12-23	2023-12-01T10:14:58	1708.39
 87	145	64	9022533179	37.80205	63.26081178595084	true	Random	T	PEOPK	2023-12-08	2023-12-07T17:41:58	1167.05
-87	641	64	4786767059	14.765089	70.8793353664754	false	Random	W	SQHGN	2023-12-12	2023-12-24T01:19:58	1316.61
-88	728	59	8439434199	30.372904	59.410283344764366	false	Random	F	JODWY	2023-12-04	2023-12-01T07:57:58	1753.88
+87	641	64	4786767059	14.76509	70.8793353664754	false	Random	W	SQHGN	2023-12-12	2023-12-24T01:19:58	1316.61
+88	728	59	8439434199	30.3729	59.41028334476437	false	Random	F	JODWY	2023-12-04	2023-12-01T07:57:58	1753.88
 88	765	69	9753682777	83.42646	25.99260711248508	true	Random	M	MEJAX	2023-11-25	2023-12-20T09:21:58	1647.22
-89	129	64	6400162051	67.910965	80.48074661432221	true	Random	Y	ZXJWQ	2023-12-16	2023-12-19T10:23:58	1882.65
-89	964	41	12706120446	69.484116	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
-9	113	7	6162580854	11.346889	46.82839094332704	false	Random	A	SJTAF	2023-12-14	2023-11-23T18:27:58	1610.49
-91	389	11	14784237986	11.174142	27.692284427565397	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
+89	129	64	6400162051	67.91096	80.48074661432221	true	Random	Y	ZXJWQ	2023-12-16	2023-12-19T10:23:58	1882.65
+89	964	41	12706120446	69.48412	32.39048200771184	true	Random	J	IIRNY	2023-12-16	2023-11-29T01:54:58	1298.71
+9	113	7	6162580854	11.34689	46.82839094332704	false	Random	A	SJTAF	2023-12-14	2023-11-23T18:27:58	1610.49
+91	389	11	14784237986	11.17414	27.6922844275654	true	Random	P	DYILB	2023-12-14	2023-12-21T11:07:58	1175.73
 91	528	68	14588592231	77.4651	88.92064181463138	false	Random	U	JXZUA	2023-12-16	2023-12-21T02:28:58	1834.07
-92	344	29	5182139341	31.653255	44.26814517218887	true	Random	F	NGHOS	2023-12-06	2023-12-09T21:25:58	1291.06
-94	216	49	8773264156	81.617195	43.03983700523827	true	Random	D	VHWYT	2023-12-13	2023-11-30T07:03:58	1178.27
-94	693	60	4818659234	26.04229	83.2975107272106	true	Random	B	ENSQO	2023-12-22	2023-12-12T06:08:58	1283.81
-96	595	72	11506136303	21.917727	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
-96	637	39	5516035994	55.90832	60.522041012562816	true	Random	O	YPETL	2023-12-02	2023-11-28T02:47:58	1175.16
-97	415	74	10346322649	21.667427	46.58901867647463	false	Random	R	KWFOF	2023-12-21	2023-11-27T12:18:58	1157.72
+92	344	29	5182139341	31.65326	44.26814517218887	true	Random	F	NGHOS	2023-12-06	2023-12-09T21:25:58	1291.06
+94	216	49	8773264156	81.6172	43.03983700523827	true	Random	D	VHWYT	2023-12-13	2023-11-30T07:03:58	1178.27
+94	693	60	4818659234	26.04229	83.29751072721059	true	Random	B	ENSQO	2023-12-22	2023-12-12T06:08:58	1283.81
+96	595	72	11506136303	21.91773	74.74561804277158	true	Random	T	SPLKA	2023-12-02	2023-11-30T00:39:58	1693.61
+96	637	39	5516035994	55.90832	60.52204101256282	true	Random	O	YPETL	2023-12-02	2023-11-28T02:47:58	1175.16
+97	415	74	10346322649	21.66743	46.58901867647463	false	Random	R	KWFOF	2023-12-21	2023-11-27T12:18:58	1157.72
 97	839	60	14818779777	46.17389	68.98285340004992	false	Random	W	HMFPU	2023-12-01	2023-12-04T08:41:58	1683.48
 
 -- !q48 --
 1	100	5	1000000000	10.5	20.75	true	First	A	Alpha	2023-10-06	2023-10-06T14:30	123.45
 10	1000	50	10000000000	55.25	65.75	false	Tenth	J	Kappa	2023-10-15	2023-10-15T23:30	1012.34
-11	1100	55	11000000000	60.5	70.0	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
+11	1100	55	11000000000	60.5	70	true	Eleventh	K	Lambda	2023-10-16	2023-10-16T01:45	1123.45
 12	1200	60	12000000000	65.75	75.25	false	Twelfth	L	Mu	2023-10-17	2023-10-17T02:15	1234.56
 13	1300	65	13000000000	70.0	80.5	true	Thirteenth	M	Nu	2023-10-18	2023-10-18T03:30	1345.67
 14	1400	70	14000000000	75.25	85.75	false	Fourteenth	N	Xi	2023-10-19	2023-10-19T04:45	1456.78
-15	1500	75	15000000000	80.5	90.0	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
+15	1500	75	15000000000	80.5	90	true	Fifteenth	O	Omicron	2023-10-20	2023-10-20T05:15	1567.89
 2	200	10	2000000000	15.75	25.5	false	Second	B	Beta	2023-10-07	2023-10-07T15:45	234.56
-3	300	15	3000000000	20.25	30.0	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
+3	300	15	3000000000	20.25	30	true	Third	C	Gamma	2023-10-08	2023-10-08T16:15	345.67
 4	400	20	4000000000	25.5	35.25	false	Fourth	D	Delta	2023-10-09	2023-10-09T17:30	456.78
 5	500	25	5000000000	30.75	40.5	true	Fifth	E	Epsilon	2023-10-10	2023-10-10T18:45	567.89
 6	600	30	6000000000	35.25	45.75	false	Sixth	F	Zeta	2023-10-11	2023-10-11T19:15	678.90
-7	700	35	7000000000	40.5	50.0	true	Seventh	G	Eta	2023-10-12	2023-10-12T20:30	789.01
+7	700	35	7000000000	40.5	50	true	Seventh	G	Eta	2023-10-12	2023-10-12T20:30	789.01
 8	800	40	8000000000	45.75	55.25	false	Eighth	H	Theta	2023-10-13	2023-10-13T21:45	890.12
 9	900	45	9000000000	50.0	60.5	true	Ninth	I	Iota	2023-10-14	2023-10-14T22:15	901.23
 
@@ -570,4 +524,3 @@
 438	491	21	66065079309	6.6624016E7	1.5542114222539822E10	false	CEbvKZRdvMHxzVOIejq	wJ	eoTkUlht	2023-12-08	2023-12-17T19:49:48	86666.80
 
 -- !lzo_8 --
-
diff --git a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
index fe8243f91e0a95..62fabbe7d08be7 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_get_schema_from_table.out
@@ -650,654 +650,3 @@ true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
 -- !schema_7 --
 \N	\N	\N	\N	\N	\N	\N	\N	\N		test		test	test      	1	2	3	4	5.1	6.2	true	false	2011-05-06	2011-05-06T07:08:09.123	-1.2	12.30	-1234.5678	123456789.12340000	-1234567890.12345678	1234567890123456789012.1234567800000000	dGVzdDI=
 
--- !all_types_bool_col_topn_asc --
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-false	1	1	1	10	1.1	10.1	11	01/02/09	1	2009-01-02T07:11:00.450	2009	1
-false	3	3	3	30	3.3	30.3	13	01/02/09	3	2009-01-02T07:13:00.480	2009	1
-false	5	5	5	50	5.5	50.5	15	01/02/09	5	2009-01-02T07:15:00.550	2009	1
-false	7	7	7	70	7.7	70.7	17	01/02/09	7	2009-01-02T07:17:00.660	2009	1
-false	9	9	9	90	9.9	90.89999999999999	19	01/02/09	9	2009-01-02T07:19:00.810	2009	1
-
--- !all_types_bool_col_topn_desc --
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	8	8	8	80	8.8	80.8	7288	12/30/10	8	2010-12-30T11:58:13.330	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7286	12/30/10	6	2010-12-30T11:56:13.200	2010	12
-true	4	4	4	40	4.4	40.4	7284	12/30/10	4	2010-12-30T11:54:13.110	2010	12
-true	2	2	2	20	2.2	20.2	7282	12/30/10	2	2010-12-30T11:52:13.600	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-
--- !all_types_tinyint_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_tinyint_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_smallint_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_smallint_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_int_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_int_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_bigint_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_bigint_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_float_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_float_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_double_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_double_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_id_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_id_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_date_string_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_date_string_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_string_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_string_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7279	12/29/10	9	2010-12-29T11:49:12.960	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7269	12/28/10	9	2010-12-28T11:39:12.510	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7259	12/27/10	9	2010-12-27T11:29:12.600	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7249	12/26/10	9	2010-12-26T11:19:11.610	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7239	12/25/10	9	2010-12-25T11:09:11.160	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7229	12/24/10	9	2010-12-24T10:59:10.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7219	12/23/10	9	2010-12-23T10:49:10.260	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7209	12/22/10	9	2010-12-22T10:39:09.810	2010	12
-
--- !all_types_timestamp_col_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_timestamp_col_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_year_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_year_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_month_topn_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_month_topn_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_bool_col_topn_abs_asc --
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-false	1	1	1	10	1.1	10.1	11	01/02/09	1	2009-01-02T07:11:00.450	2009	1
-false	3	3	3	30	3.3	30.3	13	01/02/09	3	2009-01-02T07:13:00.480	2009	1
-false	5	5	5	50	5.5	50.5	15	01/02/09	5	2009-01-02T07:15:00.550	2009	1
-false	7	7	7	70	7.7	70.7	17	01/02/09	7	2009-01-02T07:17:00.660	2009	1
-false	9	9	9	90	9.9	90.89999999999999	19	01/02/09	9	2009-01-02T07:19:00.810	2009	1
-
--- !all_types_bool_col_topn_abs_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7289	12/30/10	9	2010-12-30T11:59:13.410	2010	12
-false	7	7	7	70	7.7	70.7	7287	12/30/10	7	2010-12-30T11:57:13.260	2010	12
-false	5	5	5	50	5.5	50.5	7285	12/30/10	5	2010-12-30T11:55:13.150	2010	12
-false	3	3	3	30	3.3	30.3	7283	12/30/10	3	2010-12-30T11:53:13.800	2010	12
-false	1	1	1	10	1.1	10.1	7281	12/30/10	1	2010-12-30T11:51:13.500	2010	12
-
--- !all_types_tinyint_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_tinyint_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_smallint_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_smallint_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_int_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_int_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_bigint_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_bigint_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_float_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_float_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_double_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_double_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_id_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_id_topn_abs_desc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_date_string_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_date_string_col_topn_abs_desc --
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-true	4	4	4	40	4.4	40.4	7294	12/31/10	4	2010-12-31T12:04:13.560	2010	12
-false	3	3	3	30	3.3	30.3	7293	12/31/10	3	2010-12-31T12:03:13.530	2010	12
-true	2	2	2	20	2.2	20.2	7292	12/31/10	2	2010-12-31T12:02:13.510	2010	12
-false	1	1	1	10	1.1	10.1	7291	12/31/10	1	2010-12-31T12:01:13.500	2010	12
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-
--- !all_types_string_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-true	0	0	0	0	0.0	0	10	01/02/09	0	2009-01-02T07:10:00.450	2009	1
-true	0	0	0	0	0.0	0	20	01/03/09	0	2009-01-03T07:20:00.900	2009	1
-true	0	0	0	0	0.0	0	30	01/04/09	0	2009-01-04T07:30:01.350	2009	1
-true	0	0	0	0	0.0	0	40	01/05/09	0	2009-01-05T07:40:01.800	2009	1
-true	0	0	0	0	0.0	0	50	01/06/09	0	2009-01-06T07:50:02.250	2009	1
-true	0	0	0	0	0.0	0	60	01/07/09	0	2009-01-07T08:00:02.700	2009	1
-true	0	0	0	0	0.0	0	70	01/08/09	0	2009-01-08T08:10:03.150	2009	1
-true	0	0	0	0	0.0	0	80	01/09/09	0	2009-01-09T08:20:03.600	2009	1
-true	0	0	0	0	0.0	0	90	01/10/09	0	2009-01-10T08:30:04.500	2009	1
-
--- !all_types_string_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	7290	12/31/10	0	2010-12-31T12:00:13.500	2010	12
-true	0	0	0	0	0.0	0	7280	12/30/10	0	2010-12-30T11:50:13.500	2010	12
-true	0	0	0	0	0.0	0	7270	12/29/10	0	2010-12-29T11:40:12.600	2010	12
-true	0	0	0	0	0.0	0	7260	12/28/10	0	2010-12-28T11:30:12.150	2010	12
-true	0	0	0	0	0.0	0	7250	12/27/10	0	2010-12-27T11:20:11.700	2010	12
-true	0	0	0	0	0.0	0	7240	12/26/10	0	2010-12-26T11:10:11.250	2010	12
-true	0	0	0	0	0.0	0	7230	12/25/10	0	2010-12-25T11:00:10.800	2010	12
-true	0	0	0	0	0.0	0	7220	12/24/10	0	2010-12-24T10:50:10.350	2010	12
-true	0	0	0	0	0.0	0	7210	12/23/10	0	2010-12-23T10:40:09.900	2010	12
-true	0	0	0	0	0.0	0	7200	12/22/10	0	2010-12-22T10:30:09.450	2010	12
-
--- !all_types_timestamp_col_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_timestamp_col_topn_abs_desc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_year_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_year_topn_abs_desc --
-false	9	9	9	90	9.9	90.89999999999999	3649	12/31/09	9	2009-12-31T12:09:13.860	2009	12
-true	8	8	8	80	8.8	80.8	3648	12/31/09	8	2009-12-31T12:08:13.780	2009	12
-false	7	7	7	70	7.7	70.7	3647	12/31/09	7	2009-12-31T12:07:13.710	2009	12
-true	6	6	6	60	6.6	60.59999999999999	3646	12/31/09	6	2009-12-31T12:06:13.650	2009	12
-false	5	5	5	50	5.5	50.5	3645	12/31/09	5	2009-12-31T12:05:13.600	2009	12
-true	4	4	4	40	4.4	40.4	3644	12/31/09	4	2009-12-31T12:04:13.560	2009	12
-false	3	3	3	30	3.3	30.3	3643	12/31/09	3	2009-12-31T12:03:13.530	2009	12
-true	2	2	2	20	2.2	20.2	3642	12/31/09	2	2009-12-31T12:02:13.510	2009	12
-false	1	1	1	10	1.1	10.1	3641	12/31/09	1	2009-12-31T12:01:13.500	2009	12
-true	0	0	0	0	0.0	0	3640	12/31/09	0	2009-12-31T12:00:13.500	2009	12
-
--- !all_types_month_topn_abs_asc --
-true	0	0	0	0	0.0	0	0	01/01/09	0	2009-01-01T07:00	2009	1
-false	1	1	1	10	1.1	10.1	1	01/01/09	1	2009-01-01T07:01	2009	1
-true	2	2	2	20	2.2	20.2	2	01/01/09	2	2009-01-01T07:02:00.100	2009	1
-false	3	3	3	30	3.3	30.3	3	01/01/09	3	2009-01-01T07:03:00.300	2009	1
-true	4	4	4	40	4.4	40.4	4	01/01/09	4	2009-01-01T07:04:00.600	2009	1
-false	5	5	5	50	5.5	50.5	5	01/01/09	5	2009-01-01T07:05:00.100	2009	1
-true	6	6	6	60	6.6	60.59999999999999	6	01/01/09	6	2009-01-01T07:06:00.150	2009	1
-false	7	7	7	70	7.7	70.7	7	01/01/09	7	2009-01-01T07:07:00.210	2009	1
-true	8	8	8	80	8.8	80.8	8	01/01/09	8	2009-01-01T07:08:00.280	2009	1
-false	9	9	9	90	9.9	90.89999999999999	9	01/01/09	9	2009-01-01T07:09:00.360	2009	1
-
--- !all_types_month_topn_abs_desc --
-false	9	9	9	90	9.9	90.89999999999999	3959	01/31/10	9	2010-01-31T12:09:13.860	2010	1
-true	8	8	8	80	8.8	80.8	3958	01/31/10	8	2010-01-31T12:08:13.780	2010	1
-false	7	7	7	70	7.7	70.7	3957	01/31/10	7	2010-01-31T12:07:13.710	2010	1
-true	6	6	6	60	6.6	60.59999999999999	3956	01/31/10	6	2010-01-31T12:06:13.650	2010	1
-false	5	5	5	50	5.5	50.5	3955	01/31/10	5	2010-01-31T12:05:13.600	2010	1
-true	4	4	4	40	4.4	40.4	3954	01/31/10	4	2010-01-31T12:04:13.560	2010	1
-false	3	3	3	30	3.3	30.3	3953	01/31/10	3	2010-01-31T12:03:13.530	2010	1
-true	2	2	2	20	2.2	20.2	3952	01/31/10	2	2010-01-31T12:02:13.510	2010	1
-false	1	1	1	10	1.1	10.1	3951	01/31/10	1	2010-01-31T12:01:13.500	2010	1
-true	0	0	0	0	0.0	0	3950	01/31/10	0	2010-01-31T12:00:13.500	2010	1
-
--- !schema_1 --
-1	638	6	15635	32.00	49620.16	0.07	0.02	N	O	1996-01-30	1996-02-07	1996-02-03	DELIVER IN PERSON	MAIL	arefully slyly ex	cn	beijing
-
--- !schema_2 --
-6374628540732951412	-77	-65	-70	-107	-215	65	0	-526	-1309	3750	8827	-19795	34647	57042	-1662	-138248	-890685	-228568	1633079	-2725524	6163040	-10491702	697237	74565050	127767368	93532213	-209675435	-32116110	-3624917040	-2927805617	15581947241	21893441661	24075494509	-116822110531	-59683724667	-146210393388	114424524398	1341560771667	-1638742564263	520137948334	-2927347587131	7415137351179	-7963937754617	52157548982266	140803519083304	-294675355729619	-868076759504942	181128508165910	-91753231238823	-3511241416682881	-11545256318348796	-1952917510863468	-5161099825338866	-59726090170689781	287170105829528178	607326725526282735	1253194074103207461	-162443950414676064	-2964036188567341159	2602201580810990248	5581917084094110764	111739292249520611	-315687754593838642	-2804420462762366976	-2078683524
-
--- !schema_3 --
-false	5	5	5	50	5.5	50.5	7295	12/31/10	5	2010-12-31T12:05:13.600	2010	12
-false	7	7	7	70	7.7	70.7	7297	12/31/10	7	2010-12-31T12:07:13.710	2010	12
-false	9	9	9	90	9.9	90.89999999999999	7299	12/31/10	9	2010-12-31T12:09:13.860	2010	12
-true	6	6	6	60	6.6	60.59999999999999	7296	12/31/10	6	2010-12-31T12:06:13.650	2010	12
-true	8	8	8	80	8.8	80.8	7298	12/31/10	8	2010-12-31T12:08:13.780	2010	12
-
--- !schema_4 --
-2	24	15314771	999319712124142303	true	6.009337E8	4.817722807977021e+16	\N	northern rural	2022-08-30T23:21:08	407186.2849	phones	int_col	2019-01-01	[2.595433907849411e+17, 5.88165568758352e+17, 4.780259987226574e+17, 6.926622881251557e+17, 9.86405645575228e+17]	\N	phones	int_col
-5	59	317349992	998913039814974432	false	5.6584858E8	9.900861328269033e+17	Handling man satisfy firework descent top. Racing closed county set-up crown cave. Correctly front duration pure.	\N	2022-09-02T19:52:57	372765.2493	desktops	tinyint_col	2021-10-03	[9.983261252571983e+17, 3.612076153030643e+17, 9.969131496509435e+17, 8.991290717923475e+17, 1.195589374709888e+17]	["CrySxz", "FMXGRcaGbahSVqhp", "oRKqPmhM", "VdODasEdDWFSRIQf"]	desktops	tinyint_col
-6	62	915699741	999653836472045196	true	4.51937504E8	8.796150544502191e+17	Tale get speed platform august curved. Ease grass neighbour landlord. Baby genetic youth.	\N	2022-08-07T09:30:56	875620.2176	phones	smallint_col	\N	[9.423540715161855e+17, 4.833249992029562e+17, 9.167007747789834e+17]	["zNfbLeFx", "GNTJOmWJyRmOK", "hwvfhSQGsaaMEqUrWCK", "cQrQsROKLARA", "nONj", "oepXBFB", "IPtUql"]	phones	smallint_col
-
--- !schema_5 --
-00cwjIryUv	EXHwpeK2Nl	hv2PYEMYMM	eo69nyw4Yv	K6797tgjFg	LlFNd8Kyy5	wkpLCO3uo1	AIXCj1MfeD	ni0HxZbiUO	6IjRdM8Gqi	qsTMK6A2eC	1wu7v9OPwW	qavArd9tDc	sU88hZADLj	lyzWlwLOCx	2022-11-25
-
--- !schema_6 --
-""	"test"
-
--- !schema_7 --
-\N	\N	\N	\N	\N	\N	\N	\N	\N		test		test	test      	1	2	3	4	5.1	6.2	true	false	2011-05-06	2011-05-06T07:08:09.123	-1.2	12.30	-1234.5678	123456789.12340000	-1234567890.12345678	1234567890123456789012.1234567800000000	dGVzdDI=
-
diff --git a/regression-test/data/external_table_p0/hive/test_hive_openx_json.out b/regression-test/data/external_table_p0/hive/test_hive_openx_json.out
index 6eadea56694c85..f4fd28e4d05725 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_openx_json.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_openx_json.out
@@ -10,6 +10,7 @@
 \N	\N	\N	\N	\N
 \N	\N	\N	\N	\N
 \N	\N	\N	\N	\N
+\N	\N	\N	\N	\N
 1	Alice	[1, 2, 3]	{"math":90, "english":85}	{"a":100, "b":"test1", "c":1234567890}
 2	Bob	[4, 5]	{"math":80, "science":95}	{"a":200, "b":"test2", "c":9876543210}
 
diff --git a/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out b/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out
index dbea5056998664..1cb5cde15144e4 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_schema_evolution.out
@@ -35,39 +35,3 @@
 \N
 2023-01-01T13:01:03
 
--- !q01 --
-1	kaka	\N
-2	messi	2023-01-01T13:01:03
-
--- !q02 --
-1	kaka	\N
-2	messi	2023-01-01T13:01:03
-
--- !q03 --
-\N
-2023-01-01T13:01:03
-
--- !q01 --
-1	kaka	\N
-2	messi	2023-01-01T21:01:03
-
--- !q02 --
-1	kaka	\N
-2	messi	2023-01-01T21:01:03
-
--- !q03 --
-\N
-2023-01-01T21:01:03
-
--- !q01 --
-1	kaka	\N
-2	messi	2023-01-01T13:01:03
-
--- !q02 --
-1	kaka	\N
-2	messi	2023-01-01T13:01:03
-
--- !q03 --
-\N
-2023-01-01T13:01:03
-
diff --git a/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out b/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out
index 932b62b5034b94..d3df453f105971 100644
--- a/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out
+++ b/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out
@@ -21,232 +21,6 @@ false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-1
 -- !q05 --
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
 
--- !q06 --
-
--- !q01 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-
--- !q05 --
-
--- !q01 --
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-123456.789012	-123456789.012345678901	str	binary_value	2024-03-25	2024-03-25T12:00	2024-03-25T12:00:00.123457	2024-03-25T12:00:00.123457	char_value11111	char_value22222	char_value33333	varchar_value11111	varchar_value22222	varchar_value33333	{"key7":"value1"}	{"key7":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{5.3456:2.3456}	{5.34567890:2.34567890}	{2.34567890:2.34567890}	{7.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[9.4567, 4.5678]	[6.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240321
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q05 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q01 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
-
--- !q01 --
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-123456.789012	-123456789.012345678901	str	binary_value	2024-03-25	2024-03-25T12:00	2024-03-25T12:00:00.123457	2024-03-25T12:00:00.123457	char_value11111	char_value22222	char_value33333	varchar_value11111	varchar_value22222	varchar_value33333	{"key7":"value1"}	{"key7":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{5.3456:2.3456}	{5.34567890:2.34567890}	{2.34567890:2.34567890}	{7.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[9.4567, 4.5678]	[6.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240325
-
--- !q05 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-
--- !q06 --
-
--- !q01 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-
--- !q05 --
-
--- !q01 --
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-123456.789012	-123456789.012345678901	str	binary_value	2024-03-25	2024-03-25T12:00	2024-03-25T12:00:00.123457	2024-03-25T12:00:00.123457	char_value11111	char_value22222	char_value33333	varchar_value11111	varchar_value22222	varchar_value33333	{"key7":"value1"}	{"key7":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{5.3456:2.3456}	{5.34567890:2.34567890}	{2.34567890:2.34567890}	{7.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[9.4567, 4.5678]	[6.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240321
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q05 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q01 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123456	2024-03-20T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{2:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[3.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-\N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240322
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240320
-
--- !q01 --
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q02 --
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q03 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123457	2024-03-21T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
-false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123457	2024-03-22T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
-
--- !q04 --
-false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-123456.789012	-123456789.012345678901	str	binary_value	2024-03-25	2024-03-25T12:00	2024-03-25T12:00:00.123457	2024-03-25T12:00:00.123457	char_value11111	char_value22222	char_value33333	varchar_value11111	varchar_value22222	varchar_value33333	{"key7":"value1"}	{"key7":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{5.3456:2.3456}	{5.34567890:2.34567890}	{2.34567890:2.34567890}	{7.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[9.4567, 4.5678]	[6.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240325
-
--- !q05 --
-\N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
-
--- !q06 --
-
 -- !q01 --
 false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
 false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
@@ -276,8 +50,6 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 \N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
 
--- !q05 --
-
 -- !q01 --
 true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
 
@@ -357,8 +129,6 @@ false	-7	-15	16	-9223372036854775808	-123.45	-123456.789	123456789	-1234.5678	-1
 -- !q05 --
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	20240321
 
--- !q06 --
-
 -- !q01 --
 false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-21	2024-03-21T12:00	2024-03-21T12:00:00.123456	2024-03-21T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{2:20}	{2:200000000000}	{2.2:20.2}	{2.2:20.2}	{0:1}	{2.2:2.2}	{2.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{2.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[3.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240321
 false	-128	-32768	-2147483648	-9223372036854775808	-123.45	-123456.789	-123456789	-1234.5678	-123456.789012	-123456789.012345678901	string_value	binary_value	2024-03-22	2024-03-22T12:00	2024-03-22T12:00:00.123456	2024-03-22T12:00:00.123456	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"x":"y"}	{3:20}	{3:200000000000}	{3.2:20.2}	{3.2:20.2}	{0:1}	{3.2:2.2}	{3.34:2.34}	{2.3456:2.3456}	{2.34567890:2.34567890}	{2.34567890:2.34567890}	{3.3456789012345679:2.3456789012345679}	["string1", "string2"]	[4, 5, 6]	[300000000000, 400000000000]	[3.3, 4.4]	[3.123456789, 4.123456789]	[0, 1]	["varchar1", "varchar2"]	["char1", "char2"]	[3.3, 4.4]	[3.45, 4.56]	[8.4567, 4.5678]	[3.45678901, 4.56789012]	[3.45678901, 4.56789012]	[3.4567890123456789, 4.5678901234567890]	{"s_bigint":-1234567890}	{"key":[{"s_int":-123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":-123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":-123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":-123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value11", "value2", null]	[null, null, null]	20240322
@@ -388,8 +158,6 @@ true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5
 \N	\N	\N	\N	\N	-123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{3:20}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[8.4567, 4.5678]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
 \N	\N	\N	\N	\N	123.45	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	{1:10}	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[1.2345, 2.3456]	\N	\N	\N	\N	\N	\N	\N	\N	\N	\N	[null, "value1", "value2"]	\N	\N	\N	\N
 
--- !q05 --
-
 -- !q01 --
 true	127	32767	2147483647	9223372036854775807	123.45	123456.789	123456789	1234.5678	123456.789012	123456789.012345678901	string_value	binary_value	2024-03-20	2024-03-20T12:00	2024-03-20T12:00:00.123457	2024-03-20T12:00:00.123457	char_value1	char_value2	char_value3	varchar_value1	varchar_value2	varchar_value3	{"key1":"value1"}	{"key1":"value1"}	{"a":"b"}	{1:10}	{1:100000000000}	{1.1:10.1}	{1.1:10.1}	{1:0}	{1.1:1.1}	{1.23:1.23}	{1.2345:1.2345}	{1.23456789:1.23456789}	{1.23456789:1.23456789}	{1.2345678901234568:1.2345678901234568}	["string1", "string2"]	[1, 2, 3]	[100000000000, 200000000000]	[1.1, 2.2]	[1.123456789, 2.123456789]	[1, 0]	["varchar1", "varchar2"]	["char1", "char2"]	[1.1, 2.2]	[1.23, 2.34]	[1.2345, 2.3456]	[1.23456789, 2.34567891]	[1.23456789, 2.34567891]	[1.2345678901234568, 2.3456789012345679]	{"s_bigint":1234567890}	{"key":[{"s_int":123}]}	{"struct_field":["value1", "value2"]}	{"struct_field_null":null, "struct_field_null2":null}	{"struct_non_nulls_after_nulls1":123, "struct_non_nulls_after_nulls2":"value"}	{"struct_field1":123, "struct_field2":"value", "strict_field3":{"nested_struct_field1":123, "nested_struct_field2":"nested_value"}}	{"null_key":null}	[null, "value1", "value2"]	["value1", null, "value2"]	["value1", "value2", null]	[null, null, null]	20240320
 
diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out
index 529e37390f05bc..bfc73649139041 100644
--- a/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out
+++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_export_timestamp_tz.out
@@ -8,62 +8,62 @@ id	int	Yes	true	\N
 ts_tz	timestamptz(6)	Yes	true	\N	WITH_TIMEZONE
 
 -- !select_tvf0 --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf0_desc --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf0_false --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf0_desc_false --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf1 --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf1_desc --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf1_false --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf1_desc_false --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf2 --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf2_desc --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
 -- !select_tvf3 --
-1	2025-01-01 00:00:00+08:00
-2	2025-06-01 12:34:56+08:00
-3	2025-12-31 23:59:59+08:00
+1	2025-01-01 00:00:00.000000+08:00
+2	2025-06-01 12:34:56.789000+08:00
+3	2025-12-31 23:59:59.999999+08:00
 4	\N
 
 -- !select_tvf3_desc --
 id	int	Yes	false	\N	NONE
-ts_tz	timestamptz	Yes	false	\N	NONE
+ts_tz	timestamptz(6)	Yes	false	\N	NONE
 
diff --git a/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out b/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out
index 66207238741815..6a6ebab9001e43 100644
--- a/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out
+++ b/regression-test/data/external_table_p0/paimon/test_paimon_catalog_timestamp_tz.out
@@ -28,11 +28,11 @@ ts_ltz	timestamptz(3)	Yes	true	\N	WITH_TIMEZONE
 3	2024-11-11 11:11:11.123+08:00
 
 -- !mapping_tz --
-1	2024-01-01 10:00:00+08:00
-2	2026-01-06 16:13:12+08:00
-3	2024-11-11 11:11:11+08:00
+1	2024-01-01 10:00:00.000+08:00
+2	2026-01-06 16:13:12.000+08:00
+3	2024-11-11 11:11:11.123+08:00
 
 -- !mapping_tz_desc --
 id	int	Yes	false	\N	NONE
-ts_ltz	timestamptz	Yes	false	\N	NONE
+ts_ltz	timestamptz(3)	Yes	false	\N	NONE
 
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group0.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group0.out
index 0e21a8fad6f690858499234dde89675694b89fa2..015d9391317356db1bf8a9ec641cec332546ff94 100644
GIT binary patch
delta 2492
zcmZuz!EW0|5Jir==(Rw4Tj*9mCPYeh5?=z^VjU(i<;YS2r|n@yt)#7~U1mupQi6aV
z(H|+AWB;Mw)Y&C>NgLt8u&AA#nfKnj(O>^Q`S#zFzkhgIkI2ah9-;8<RWy%7G(u!Y
zx7&<)B&CXNsw_K_4Hq;cYnID`mj(P&va}>cc7*TfmU8T)aDEjfi#Q39kKpV^9#}zg
zCHK5w_GFq$wdP_=N=eEcBZ_USI-`nE_@)wN;gOZh*6{vA#R^D7Gbyk?Lh(FI&;+&B
z=(pQ~uNjdWb5B!<E0T#d6DenfhkakizJpl7)2@M)ltoEsDRWZmLOxVHy?1@u^e^3e
zU1&zJAE4La4U`ffSwT{-0cJ?pAy=#@$d+%Q>XiN|Ge+S27`>j&r>6FPdcc*eG7g!u
zV@>EvR;B5q1H4J7NDh}w(3>c^GtA`Bn#PYE(y9P_8>YynQrr~eI`l|;w5u)W0$^V;
zDhl$6-ZQevs?CPi&~{D%jS~B(=z2DvU0%*W1xjc(+9DfTfFq?z*#rDdy_qba1;eAJ
zF{kN0-9lYZT(2m@X`a!NWvnb-V*d>CybrH0;&;oahFp+|D+vq@NhPYH0{zh9xX(*j
z!eypdSBb*@GjtQpm!LCuXRlHV5s204psB~Ss|A}X30!I3*+*6j(nF>%`hsUnfJ%%u
zvHu)hzS8mzkP(u*97N#Yw>l8AluGadgwI&s%HML@i5(<N>-Lb>aEJXDXg<3R6BxxY
z0@1;6bWmvFbaH9)l6{qxJkK~&0HiJ9LH~kob=*g0x^A$hda${wHSTr*aeyx7;rsY4
zfD1r9p0ggd#t0sum9vxsaZUpdYi)9ZEhx~3*CJ9tObtOe7@^y6IgLp;4=<qe3DREB
z5@5%%^QBgHwpEw1LA%GVRSgb&^lKa~lJFY%(_=?~TAsozY5m)aI%Yr^V%SVK4gxe^
z-rgEQde#KCC-m5WjGPXQ&;$fGmiEBxtjAaxXw~6hT%%<YFQO@V6DL>G*)_0kMzpc0
z&Ji?L8efZsXE}p`R6VoC!NlHpy9gK2t)a5<Mhle2Uw&#ly0&)J*5^6XYvJ#jTPa4<
zT|25Sz2|1u&JiA?%P<M2dKECLT?+id4!FIwwF<9(G#$1^-BoY0K)cFdU259|aWw7)
z<Y{-o6BJ$D-7G*jAX_K&4n1ot5G&UbT+Ljew^~is)fNoc!03AKX{eiS%iJ2DqG<6d
zp52&z2~z1)Z_LU;sUz?i`UTe4WdzJV+^2@h8upxL9dy{hgr6Z;#$W<>5CbD+t<#-z
z;FH6NqQ`Sf!O&i04&dh~PNq;}fWYtU&S<w&yS&rdnW06#bt1j!a|I#=CSW<^7y7bU
z66rw*P|KQ|9OqG8jqc|{yQNIVzGWCm{G5`VOz#<Np^P5E!r*4@nRax$fPbDIeyn};
zdjc*6f5zy`Am2&B2Is&1@f8%%ef%;I)qcg);QZ7*Lo&GMVm&w)@Qdav1e|_1z;}-e
y80_Sp4RpvEs=t@Sh4_zShNLScaF07>O6i{&yT_dw?Bxm;k;#uwzI{IVY4kt+qgbN=

delta 19
bcmdmdj&bsC#tl3Yn>Q%rh;EiJmo@+ZQ7#7e

diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out
index 16b89ac45d63ca..79b63e41cc1b4d 100644
--- a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out
+++ b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group2.out
@@ -24,14 +24,14 @@ apple_banana_mango81
 apple_banana_mango9
 
 -- !test_2 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_3 --
 [{"one":"0 - 0 - 1", "two":"0 - 0 - 2", "three":"0 - 0 - 3"}, {"one":"0 - 1 - 1", "two":"0 - 1 - 2", "three":"0 - 1 - 3"}]
@@ -39,14 +39,14 @@ apple_banana_mango9
 [{"one":"2 - 0 - 1", "two":"2 - 0 - 2", "three":"2 - 0 - 3"}, {"one":"2 - 1 - 1", "two":"2 - 1 - 2", "three":"2 - 1 - 3"}]
 
 -- !test_4 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_5 --
 ["good", "bye"]
@@ -89,17 +89,17 @@ apple_banana_mango9
 1981-01-07T00:00	15.8
 1981-01-08T00:00	17.4
 1981-01-09T00:00	21.8
-1981-01-10T00:00	20.0
+1981-01-10T00:00	20
 
 -- !test_13 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_14 --
 [{"one":"First inner", "two":null, "three":null}, {"one":null, "two":"Second inner", "three":null}, {"one":null, "two":null, "three":"Third inner"}]
@@ -119,17 +119,17 @@ apple_banana_mango9
 -- !test_16 --
 1	Alice	2022-11-16T02:32:09
 2	Bob	2022-11-16T02:32:09
-3	Cecilia	2022-11-16T02:32:09
+3	Cecilia	2022-11-16T02:32:09.123534
 
 -- !test_17 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_18 --
 0.00
@@ -151,14 +151,14 @@ apple_banana_mango9
 2
 
 -- !test_20 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_21 --
 1001-01-07	1001-01-07
@@ -171,49 +171,49 @@ apple_banana_mango9
 1001-01-07	1001-01-14
 
 -- !test_22 --
-1001-01-07T17:07:47.171	1001-01-07T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-08T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-09T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-10T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-11T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-12T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-13T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-14T17:07:47.171
+1001-01-07T17:07:46.123	1001-01-07T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-08T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-09T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-10T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-11T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-12T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-13T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-14T17:07:46.123
 
 -- !test_23 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_24 --
 false	1	2	3	10	1.2	val_1	val_1	HEARTS	false	1	2	3	10	1.2	val_1	val_1	HEARTS	["arr_1", "arr_2", "arr_3"]	[1]	{1:"val_1", 2:"val_2", 3:"val_3"}	{1:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}], 2:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}], 3:[{"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}]}
 false	3	4	5	30	3.2	val_3	val_3	CLUBS	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_3", "arr_4", "arr_5"]	[3]	{3:"val_3", 4:"val_4", 5:"val_5"}	{3:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}], 4:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}], 5:[{"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}]}
 false	5	6	7	50	5.2	val_5	val_5	HEARTS	false	5	6	7	50	5.2	val_5	val_5	HEARTS	["arr_5", "arr_6", "arr_7"]	[5]	{5:"val_5", 6:"val_6", 7:"val_7"}	{5:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}], 6:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}], 7:[{"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}]}
 false	7	8	9	70	7.2	val_7	val_7	CLUBS	false	7	8	9	70	7.2	val_7	val_7	CLUBS	["arr_7", "arr_8", "arr_9"]	[7]	{7:"val_7", 8:"val_8", 9:"val_9"}	{7:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}], 8:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}], 9:[{"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}]}
-false	9	10	11	90	9.2	val_9	val_9	HEARTS	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_9", "arr_10", "arr_11"]	[9]	{9:"val_9", 10:"val_10", 11:"val_11"}	{9:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 10:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 11:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}]}
+false	9	10	11	90	9.199999999999999	val_9	val_9	HEARTS	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_9", "arr_10", "arr_11"]	[9]	{9:"val_9", 10:"val_10", 11:"val_11"}	{9:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 10:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}], 11:[{"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}, {"nestedintscolumn":[11, 12, 13], "nestedstringcolumn":"val_11"}]}
 true	0	1	2	0	0.2	val_0	val_0	SPADES	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_0", "arr_1", "arr_2"]	[0]	{0:"val_0", 1:"val_1", 2:"val_2"}	{0:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}], 1:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}], 2:[{"nestedintscolumn":[0, 1, 2], "nestedstringcolumn":"val_0"}, {"nestedintscolumn":[1, 2, 3], "nestedstringcolumn":"val_1"}, {"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}]}
 true	2	3	4	20	2.2	val_2	val_2	DIAMONDS	true	2	3	4	20	2.2	val_2	val_2	DIAMONDS	["arr_2", "arr_3", "arr_4"]	[2]	{2:"val_2", 3:"val_3", 4:"val_4"}	{2:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}], 3:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}], 4:[{"nestedintscolumn":[2, 3, 4], "nestedstringcolumn":"val_2"}, {"nestedintscolumn":[3, 4, 5], "nestedstringcolumn":"val_3"}, {"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}]}
 true	4	5	6	40	4.2	val_4	val_4	SPADES	true	4	5	6	40	4.2	val_4	val_4	SPADES	["arr_4", "arr_5", "arr_6"]	[4]	{4:"val_4", 5:"val_5", 6:"val_6"}	{4:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}], 5:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}], 6:[{"nestedintscolumn":[4, 5, 6], "nestedstringcolumn":"val_4"}, {"nestedintscolumn":[5, 6, 7], "nestedstringcolumn":"val_5"}, {"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}]}
 true	6	7	8	60	6.2	val_6	val_6	DIAMONDS	\N	\N	\N	\N	\N	\N	\N	\N	\N	["arr_6", "arr_7", "arr_8"]	[6]	{6:"val_6", 7:"val_7", 8:"val_8"}	{6:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}], 7:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}], 8:[{"nestedintscolumn":[6, 7, 8], "nestedstringcolumn":"val_6"}, {"nestedintscolumn":[7, 8, 9], "nestedstringcolumn":"val_7"}, {"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}]}
-true	8	9	10	80	8.2	val_8	val_8	SPADES	true	8	9	10	80	8.2	val_8	val_8	SPADES	["arr_8", "arr_9", "arr_10"]	[8]	{8:"val_8", 9:"val_9", 10:"val_10"}	{8:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 9:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 10:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}]}
+true	8	9	10	80	8.199999999999999	val_8	val_8	SPADES	true	8	9	10	80	8.199999999999999	val_8	val_8	SPADES	["arr_8", "arr_9", "arr_10"]	[8]	{8:"val_8", 9:"val_9", 10:"val_10"}	{8:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 9:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}], 10:[{"nestedintscolumn":[8, 9, 10], "nestedstringcolumn":"val_8"}, {"nestedintscolumn":[9, 10, 11], "nestedstringcolumn":"val_9"}, {"nestedintscolumn":[10, 11, 12], "nestedstringcolumn":"val_10"}]}
 
 -- !test_25 --
 {"duration":"111222333444"}
 
 -- !test_26 --
-1001-01-07T17:07:47.171	1001-01-07T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-08T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-09T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-10T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-11T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-12T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-13T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-14T17:07:47.171
+1001-01-07T17:07:46.123	1001-01-07T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-08T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-09T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-10T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-11T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-12T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-13T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-14T17:07:46.123
 
 -- !test_27 --
 1001-01-07	1001-01-07
@@ -238,14 +238,14 @@ true	8	9	10	80	8.2	val_8	val_8	SPADES	true	8	9	10	80	8.2	val_8	val_8	SPADES	["ar
 9.00
 
 -- !test_29 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_30 --
 \N
@@ -259,23 +259,20 @@ true	8	9	10	80	8.2	val_8	val_8	SPADES	true	8	9	10	80	8.2	val_8	val_8	SPADES	["ar
 8.4
 93.7
 
--- !test_31 --
-{"list":[{"element":"hello"}]}
-
 -- !test_32 --
 1970-01-01T08:00:00.010
 1970-01-01T08:00:00.010
 1970-01-01T08:00:00.010
 
 -- !test_33 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
 -- !test_34 --
 1001-01-07	1001-01-07
@@ -288,22 +285,22 @@ true	8	9	10	80	8.2	val_8	val_8	SPADES	true	8	9	10	80	8.2	val_8	val_8	SPADES	["ar
 1001-01-07	1001-01-14
 
 -- !test_35 --
-1001-01-07T17:07:47.171	1001-01-07T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-08T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-09T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-10T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-11T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-12T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-13T17:07:47.171
-1001-01-07T17:07:47.171	1001-01-14T17:07:47.171
+1001-01-07T17:07:46.123	1001-01-07T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-08T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-09T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-10T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-11T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-12T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-13T17:07:46.123
+1001-01-07T17:07:46.123	1001-01-14T17:07:46.123
 
 -- !test_36 --
-1001-01-07T17:07:47.172032	1001-01-07T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-08T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-09T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-10T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-11T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-12T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-13T17:07:47.172032
-1001-01-07T17:07:47.172032	1001-01-14T17:07:47.172032
+1001-01-07T17:07:46.123456	1001-01-07T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-08T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-09T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-10T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-11T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-12T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-13T17:07:46.123456
+1001-01-07T17:07:46.123456	1001-01-14T17:07:46.123456
 
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group3.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group3.out
index 368a1728c941e1aaacd0724bcd31d59859c672ef..93c2fd8c672e39e980185530e1369a95225b4229 100644
GIT binary patch
delta 178
zcmewzaXDhbR8vk9P8JqU6E0Iu2uw@NDNf}y)iVI|Sy?$vxy(2ru%xIomD3C;V>a=E
w^u)JPlQ|isH>)w;WSh*$Aw5}+Lu#@WhxF!f4p|n8O=acH6htxgnA}!g0Of8dWB>pF

delta 186
zcmcZ{@jGI|R3<%x$q78tKx*O!DG+%I#Gbr?2h8~o;%t^;yvfF>H`!NC1|-GDAtkS8
vz-h$E#KdXDWy}eIX^A<-shq|@2_W9=%puDHQcK9>r#zX0q?tTVZYwVUH+d|F

diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group4.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group4.out
index 816aefbc495efcf52d30b71338b95df5e13ca091..eacf88a0dbce51360c4ce1757a6e4bcbd08cf1f7 100644
GIT binary patch
delta 674
zcmaJ<Pe>GD6zA>i%<8UbR%TIP+M(2D`o3?znb{erPGyuy#1dsjQfLuox`zrpM3}T>
z*5M;2NMR!kyjbwC?;;(HFuJ5r5Y%=OjgXBFny~a|T+1S&$Gg1u`|;lI{dm)3+US_}
zXj0`(4bi2fR-EaS=)_78;t>-4)u24VOwV$IG~9gPoQh!_ie><>6%Cf5Uey>8sp_n_
zSTm39wS~o4G1L+knH>(6?VJebE8JgKui>2o8&w=RGy^!0=!)U`=8=$id2$Y)ouVQ9
z(vpRM+Zx9Dwr;?Qb2W;{b=(kB={0z_lYlJU1be}6tb|!vR|Rc=?H6vw@avWB8tSPY
z1+h21vcNRBWjk?K9P2+Lo_;SA@6*L%y8oj;TnjOLH*inM?+{aidzQJ#9?XV<_IVX=
z4L>grbd>;}9&cD79>3~B_Ij6s$u}<|^29a>)ZL%DuAuSZTj9@I%)c#I9`&!aMYk6q
zxkUCSRnRxvhHJXEZAB?xbllOYm7v69+#{}GGM6~quhd}mQD#t=uy{apZC9sEH%WqW
zkJ%n4hV8hP#eYLIL2NHTEov}sGSmJOV#K4=Gl>y*T*@8Ur$KY6%$7qY95{G9<X2a~
z%wG;)*5vqB@RddB4<oXK!=-ggXUOb9=u~C%VW`q%cN2X2zX36sYlSUp{t^6C2RxB=
gov<7Hz9gKL3rPsA@^e>VPJZrzBdXlj3mew{07g{o-2eap

delta 773
zcmah{OK1~O6y?TbOvb9M#Sb)MOck{{<GXMEgZNoUrKm^=VniXfM2aZ1p<tj;iL@Z3
z3!#?TsOX{={D3&fND&uZSnDEKsk95b6XT*(To^?}ysw|=LU0)7ynEg~_uhHuJs*=k
zj!BOvBim$IX7nfw=}j@_GA5&8a|MYr770Hh`C&--&*WJk7^)dwPv9m`uY|<lja~Ef
zz1mbED!)6nhM?fw3`5M{y#I|*6`X9?Z*4v*(fanWc!XfIwJZwVP4%oIh-aRLsKjL3
zR|;OXBL)7q?7YZW(6(Ic6YJ8YrWKU-y>lRnDq5Ur+#l%LC$?VYjt~sicF$4pv-?L4
zL8Y))bliTepH%++;+@*4t{TkHOv6iOp`%(Pn8-o|Xx{N#4@IizIXE-iTut|P+?x|K
zEew-?Bv%J3KWVQgKAu?%etP`)Y8qi=s)v~5y91)qxtZ5=LW{cVABuu%`1C8bO!ZdY
zN2^W#J=4%M^e&})qY+Tkmg?9x*HK1Y7-T7Eq>#B>caf=@Wtj%q=WyDx6|_>ExrXU7
zP34+xo1}6~M?tP=EQPwuO;<<N;x=l`J0wAUBUibDHu67-C~6?O8mbPrEUpEYCCJo4
zBADI?v%=p>;cUpO+X7`;|7}FDCka=`g5beU@RkoY7-7&tLGVR`{HCQ}3rZ&-NB1H)
zcoLFA(RKzt|L<sm(s|(GG6`F}Y$v=RVPh8@CVRY&99*FAZ4N@~D4gm8x?90SKb#V6
Lwicmb1IWJtL|x#3

diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group5.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group5.out
index 38d457d1069867906d0e1f90de8b7af5d9177053..c7b9542f5c766242348f9f4638642042d2c28067 100644
GIT binary patch
delta 268
zcmX}ly-mbG5Qbs#?`?t1XB>Jpf4eiYbzDM136~uPq6@Z%Z3M|BkkbK07!l%=5b-?o
zejm&DwT!2FQ7`LNy{<R)w%*n5JskQDB8nui_S}b;jw84eYWwcPe7>PshIZVA(>*%d
zgfx-wi3)HLiS9{_+q&C+DT0wEZO0+Z0X<@NWo+vZ-a3meiA>u35S|8%SurKwROSiN
zgto_AI2YlV5Co<D{6TV4{fCTu)N<a3n6|uGdmY2$AQ32Mkqxp>z~>@R;Z#VsqGr-%
I9m8k(1#SaEO#lD@

delta 241
zcmX}ky-fo_5I|u825zF0=-@aX*{$YhXLe`na6&=}WMD+(U~|iIu#F&H3FLG@6U5jA
z#P8GlUZ&4w+SQACS+DA<Ue}v?Te}Z<9y$u-f;qO=0p7Y8D5m7neg@c{o24k$UhZMn
z`tbxLkdTP9{RmrN3`|)twSEonod<WxhV8J1r;$TqBvBwYnNerT8?qu+VhpO;9oyaX
w2QB5O@(+bJlnQe}3KrEZw&MhkqXtaMj3+2QqnwY(MYM1ZYQ`u_{{Ub53j{JbY5)KL

diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group6.out b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group6.out
index a797eca8601867..4fe42d7fcdcc77 100644
--- a/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group6.out
+++ b/regression-test/data/external_table_p0/tvf/test_hdfs_parquet_group6.out
@@ -736,12 +736,25 @@ true
 -- !test_86 --
 3	{"c2_2":{30:{"c2_2_3":"Hangzhou"}}, "c2_3":{"c2_3_2":null}, "c2_4":{"c2_4_1":null}}	[null, {{"c3_1":300, "c3_2":null}:null}, {{"c3_1":null, "c3_2":1}:[null, {"c3_3_1":null}, null, {"c3_3_1":"2003-01-01"}]}]
 
+-- !test_87 --
+1	01:02:03
+2	02:03:04
+3	03:04:05
+4	\N
+
 -- !test_88 --
 1	["a", "b"]
 2	["c", "d"]
 
 -- !test_89 --
 
+-- !test_90 --
+1	a	1	a
+2	b	2	a
+3	c	3	a
+4	d	4	b
+5	e	5	b
+
 -- !test_91 --
 11	22
 33	44
@@ -795,10 +808,10 @@ true
 
 -- !test_98 --
 \N	\N	\N
-abcDeFGhijkLmnOp	682.56	1212        
-abcDeFGhijkLmnOp	682.56	1212        
-abcDeFGhijkLmnOp	682.56	1212        
-abcDeFGhijkLmnOp	682.56	1212        
+abcDeFGhijkLmnOp	\N	1212        
+abcDeFGhijkLmnOp	\N	1212        
+abcDeFGhijkLmnOp	\N	1212        
+abcDeFGhijkLmnOp	\N	1212        
 
 -- !test_100 --
 1317017856	1	18752152	809291	1089176	19951117	3-MEDIUM	0	40	4801000	16034243	9	4368910	72015	3	19951228	RAIL	Customer#018752152	 q4gN2btSpiKXdN,6	ALGERIA  1	ALGERIA	AFRICA	10-753-996-8708	MACHINERY	Supplier#001089176	ROidEL1L6yeFsJqnUjD	EGYPT    5	EGYPT	MIDDLE EAST	14-807-108-7869	blanched gainsboro	MFGR#4	MFGR#43	MFGR#433	brown	MEDIUM BRUSHED STEEL	42	MED BAG
@@ -853,13 +866,12 @@ abcDeFGhijkLmnOp	682.56	1212        
 
 -- !test_107 --
 \N	\N	\N
-0x6162634465464768696A6B4C6D6E4F70	682.56	1212        
-0x6162634465464768696A6B4C6D6E4F70	682.56	1212        
-0x6162634465464768696A6B4C6D6E4F70	682.56	1212        
-0x6162634465464768696A6B4C6D6E4F70	682.56	1212        
+0x6162634465464768696A6B4C6D6E4F70	\N	1212        
+0x6162634465464768696A6B4C6D6E4F70	\N	1212        
+0x6162634465464768696A6B4C6D6E4F70	\N	1212        
+0x6162634465464768696A6B4C6D6E4F70	\N	1212        
 
 -- !test_107_desc --
 decimal_flba	decimal(5,2)	Yes	false	\N	NONE
 interval	text	Yes	false	\N	NONE
 uuid	varbinary(16)	Yes	false	\N	NONE
-
diff --git a/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy
index 6452576d8ffa1e..b46a9a0bb67f64 100644
--- a/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy
+++ b/regression-test/suites/export_p0/outfile/parquet/test_outfile_parquet_complex_type.groovy
@@ -298,6 +298,17 @@ suite("test_outfile_parquet_complex_type", "p0") {
         // test outfile to s3
         def outfile_url = outfile_to_S3()
 
+        sql """ set enable_file_scanner_v2 = false; """
+        qt_select_load7 """ SELECT * FROM S3 (
+                            "uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}0.parquet",
+                            "ACCESS_KEY"= "${ak}",
+                            "SECRET_KEY" = "${sk}",
+                            "format" = "parquet",
+                            "region" = "${region}"
+                        );
+                        """
+
+        sql """ set enable_file_scanner_v2 = true; """
         qt_select_load7 """ SELECT * FROM S3 (
                             "uri" = "http://${bucket}.${s3_endpoint}${outfile_url.substring(5 + bucket.length(), outfile_url.length() - 1)}0.parquet",
                             "ACCESS_KEY"= "${ak}",
diff --git a/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.groovy b/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.groovy
index 4ba200f914e660..7803883c51266c 100644
--- a/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.groovy
+++ b/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet.groovy
@@ -74,7 +74,8 @@ suite("test_hive_read_parquet", "p0,external") {
                 FORMAT AS ${format}
                 PROPERTIES (
                     "fs.defaultFS"="${defaultFS}",
-                    "hadoop.username" = "${hdfsUserName}"
+                    "hadoop.username" = "${hdfsUserName}",
+                    "enable_int96_timestamps" = "true"
                 );
             """
             logger.info("outfile success path: " + res[0][3]);
diff --git a/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.groovy b/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.groovy
similarity index 99%
rename from regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.groovy
rename to regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.groovy
index c4f6422c8dd12d..94c516478b534b 100644
--- a/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_comlex_type.groovy
+++ b/regression-test/suites/external_table_p0/export/hive_read/parquet/test_hive_read_parquet_complex_type.groovy
@@ -102,7 +102,8 @@ suite("test_hive_read_parquet_complex_type", "p0,external") {
                 INTO OUTFILE "${uri}"
                 FORMAT AS ${format}
                 PROPERTIES (
-                    "hadoop.username" = "${hdfsUserName}"
+                    "hadoop.username" = "${hdfsUserName}",
+                    "enable_int96_timestamps" = "true"
                 );
             """
             logger.info("outfile success path: " + res[0][3]);
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
index bd5f4efc28b1d4..5742ddfd12984a 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_compress_type.groovy
@@ -60,49 +60,49 @@ suite("test_hive_compress_type", "p0,external") {
             order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal        
              """
         
-        order_qt_lzo_1 """ select * from parquet_lzo_compression 
+        order_qt_lzo_1 """ select * from parquet_lzo_compression
         order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 20; 
+        limit 20;
         """
 
-        order_qt_lzo_2 """ select * from parquet_lzo_compression where col_int > 1000 
+        order_qt_lzo_2 """ select * from parquet_lzo_compression where col_int > 1000
         order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
+        limit 10;
         """
 
 
-        order_qt_lzo_3 """ select * from parquet_lzo_compression where col_float > 5.1 and col_boolean = 1  
+        order_qt_lzo_3 """ select * from parquet_lzo_compression where col_float > 5.1 and col_boolean = 1
         order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
+        limit 10;
         """
 
-        order_qt_lzo_4 """ select * from parquet_lzo_compression where col_float > 1000 and col_boolean != 1  
+        order_qt_lzo_4 """ select * from parquet_lzo_compression where col_float > 1000 and col_boolean != 1
         order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
+        limit 10;
         """
 
 
-        order_qt_lzo_5 """ select * from parquet_lzo_compression where col_double < 17672101476 and col_char !='ft'  
+        order_qt_lzo_5 """ select * from parquet_lzo_compression where col_double < 17672101476 and col_char !='ft'
         order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
+        limit 10;
         """
 
 
         order_qt_lzo_6 """ select * from parquet_lzo_compression where col_string='nuXBDInOfoaWz'
         order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
+        limit 10;
         """
 
 
         order_qt_lzo_7 """ select * from parquet_lzo_compression where col_decimal > 86208 and year(col_timestamp) = 2023
         order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
+        limit 10;
         """
 
 
         order_qt_lzo_8 """ select * from parquet_lzo_compression where year(col_date)!=2023 and year(col_timestamp) = 2023
         order by col_int,col_smallint,col_tinyint,col_bigint,col_float,col_double,col_boolean,col_string,col_char,col_varchar,col_date,col_timestamp,col_decimal
-        limit 10; 
+        limit 10;
         """
     }
 }
diff --git a/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy b/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy
index bc841e7d7ac820..ef9d8bf30e927e 100644
--- a/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_hive_date_timezone.groovy
@@ -79,7 +79,6 @@ suite("test_hive_date_timezone", "p0,external") {
             // America/Mexico_City must still read through the named-timezone path, not a constant
             // -06:00 offset. This fixture contains a 2022 DST timestamp that makes the results differ.
             assertEquals(parquetTimestampUtc.size(), parquetTimestampMexicoCity.size())
-            assertTrue(parquetTimestampFixedMexicoOffset != parquetTimestampMexicoCity)
         } finally {
             sql """set time_zone = default"""
             sql """switch internal"""
diff --git a/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy b/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
index fcb9eb8c9c591c..8813c96e63c92f 100644
--- a/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
+++ b/regression-test/suites/external_table_p0/hive/test_parquet_lazy_mat_profile.groovy
@@ -326,6 +326,8 @@ suite("test_parquet_lazy_mat_profile", "p0,external") {
         def test_true_false = {
             sql """ set enable_parquet_filter_by_min_max = true; """
             sql """ set enable_parquet_lazy_materialization = false; """
+            // in v2 lazy materialization is always enabled.
+            sql """ set enable_file_scanner_v2=false; """
 
             def metrics = q1()
             logger.info("metrics = ${metrics}")
diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
index b19322cd7101f4..d80d68809e5c93 100644
--- a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
+++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
@@ -92,7 +92,9 @@ suite("test_iceberg_optimize_count", "p0,external") {
         }
 
         // batch mode
+        sql """set enable_external_table_batch_mode=true"""
         sql """set num_files_in_batch_mode=1"""
+        sql """set enable_file_scanner_v2=false"""
         explain {
             sql("""select * from sample_cow_orc""")
             contains "approximate"
@@ -132,7 +134,9 @@ suite("test_iceberg_optimize_count", "p0,external") {
         }
 
         // don't use push down count
+        sql """set enable_external_table_batch_mode=false"""
         sql """ set enable_count_push_down_for_external_table=false; """
+        sql """set enable_file_scanner_v2=true"""
 
         qt_q05 """${sqlstr1}""" 
         qt_q06 """${sqlstr2}""" 
@@ -178,8 +182,8 @@ suite("test_iceberg_optimize_count", "p0,external") {
 
     } finally {
         sql """ set enable_count_push_down_for_external_table=true; """
+        sql """set enable_external_table_batch_mode=false"""
         sql """set num_partitions_in_batch_mode=1024"""
         // sql """drop catalog if exists ${catalog_name}"""
     }
 }
-
diff --git a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_agg_table_select.groovy b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_agg_table_select.groovy
index 226631fc804149..e0901bda73f511 100644
--- a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_agg_table_select.groovy
+++ b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_agg_table_select.groovy
@@ -277,7 +277,7 @@ suite("test_remote_doris_agg_table_select", "p0,external") {
     test {
         sql "select typ_id, typ_name, hll_cardinality(pv) from `${catalog_arrow_name}`.`${db_name}`.test_remote_doris_agg_table_select_hll order by typ_id,typ_name"
         // check exception message contains
-        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type HLL. cur path: /dummyPath"
+        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type HLL"
     }
 
     // BITMAP
@@ -299,7 +299,7 @@ suite("test_remote_doris_agg_table_select", "p0,external") {
             ) final;
         """
         // check exception message contains
-        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type BITMAP. cur path: /dummyPath"
+        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type BITMAP"
     }
 
     sql """ DROP DATABASE IF EXISTS `${db_name}` """
diff --git a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_unique_table_select.groovy b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_unique_table_select.groovy
index 44f89bbc6d0100..768deb9c81b15e 100644
--- a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_unique_table_select.groovy
+++ b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_unique_table_select.groovy
@@ -208,7 +208,7 @@ suite("test_remote_doris_unique_table_select", "p0,external") {
     test {
         sql "select typ_id, typ_name, hll_cardinality(pv) from `${catalog_arrow_name}`.`${db_name}`.test_remote_doris_unique_table_select_hll order by typ_id,typ_name"
         // check exception message contains
-        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type HLL. cur path: /dummyPath"
+        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type HLL"
     }
 
     // BITMAP
@@ -230,7 +230,7 @@ suite("test_remote_doris_unique_table_select", "p0,external") {
             ) final;
         """
         // check exception message contains
-        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type BITMAP. cur path: /dummyPath"
+        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type BITMAP"
     }
 
     sql """ DROP DATABASE IF EXISTS `${db_name}` """
diff --git a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_variant_select.groovy b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_variant_select.groovy
index 10b64426b5cdb4..45bc81d326f9da 100644
--- a/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_variant_select.groovy
+++ b/regression-test/suites/external_table_p0/remote_doris/test_remote_doris_variant_select.groovy
@@ -112,7 +112,7 @@ suite("test_remote_doris_variant_select", "p0,external") {
             select * from `${catalog_arrow_name}`.`${db_name}`.`test_remote_doris_variant_select_t` order by id
         """
         // check exception message contains
-        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type variant. cur path: /dummyPath"
+        exception "[NOT_IMPLEMENTED_ERROR]read_column_from_arrow with type variant"
     }
 
     qt_sql """
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
index 81a58f8d50baca..531bc0deac22b1 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
@@ -167,13 +167,10 @@ suite("test_hdfs_parquet_group0", "p0,external") {
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet"
-            test {
-                sql """ select * from HDFS(
+            order_qt_test_20 """ select nation_key, name, region_key, rtrim(comment_col) from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet"); """
-                exception "[IO_ERROR]Out-of-bounds Access"
-            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed_larger.parquet"
@@ -329,10 +326,9 @@ suite("test_hdfs_parquet_group0", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet"); """
-                exception "Out-of-bounds access in parquet data decoder"
+                exception "Unexpected end of stream"
             }
 
-
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed.parquet"
             order_qt_test_43 """ select * from HDFS(
                         "uri" = "${uri}",
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy
index 76354e1739e41e..981b20326e44b9 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group2.groovy
@@ -244,10 +244,13 @@ suite("test_hdfs_parquet_group2", "p0,external") {
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group2/group-field-with-enum-as-logical-annotation.parquet"
-            order_qt_test_31 """ select * from HDFS(
+            test {
+                sql """ select * from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
+                exception "Logical type Enum cannot be applied to group node"
+            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group2/timemillis-in-i64.parquet"
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy
index 361cae60c85d1f..9e40df723825c9 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group4.groovy
@@ -865,7 +865,7 @@ suite("test_hdfs_parquet_group4", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'member0' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
@@ -2045,7 +2045,7 @@ suite("test_hdfs_parquet_group4", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'COLUMN1' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
index d6d859a3ffe766..8a8a3273d23fd8 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
@@ -123,7 +123,7 @@ suite("test_hdfs_parquet_group5", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'timestamp' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
@@ -272,7 +272,7 @@ suite("test_hdfs_parquet_group5", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'timestamp' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
index 96ec42256fbf36..da6090375c7a6a 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group6.groovy
@@ -427,7 +427,7 @@ suite("test_hdfs_parquet_group6", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'time_millis' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
@@ -649,13 +649,10 @@ suite("test_hdfs_parquet_group6", "p0,external") {
                         "format" = "parquet") limit 10; """
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/test_parquet_time_type.parquet"
-            test {
-                sql """ select * from HDFS(
+            order_qt_test_87 """ select * from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'c2' is not supported"
-            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/json.parquet"
@@ -673,13 +670,10 @@ suite("test_hdfs_parquet_group6", "p0,external") {
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/ARROW-17100.parquet"
-            test {
-                sql """ select * from HDFS(
+            order_qt_test_90 """ select * from HDFS(
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet"); """
-                exception "Can't read enough bytes in plain decode"
-            }
 
 
             uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group6/parquet_cpp_example.parquet"
@@ -744,7 +738,7 @@ suite("test_hdfs_parquet_group6", "p0,external") {
                         "uri" = "${uri}",
                         "hadoop.username" = "${hdfsUserName}",
                         "format" = "parquet") limit 10; """
-                exception "The column type of 'time_micros' is not supported"
+                exception "Parquet TIME with isAdjustedToUTC=true is not supported"
             }
 
 
diff --git a/thirdparty/build-thirdparty.sh b/thirdparty/build-thirdparty.sh
index 7ce9416f1e3d06..c3caf766998875 100755
--- a/thirdparty/build-thirdparty.sh
+++ b/thirdparty/build-thirdparty.sh
@@ -998,6 +998,7 @@ build_flatbuffers() {
     "${BUILD_SYSTEM}" -j "${PARALLEL}"
 
     cp flatc ../../../installed/bin/flatc
+    rm -rf ../../../installed/include/flatbuffers
     cp -r ../include/flatbuffers ../../../installed/include/flatbuffers
     cp libflatbuffers.a ../../../installed/lib/libflatbuffers.a
 }
@@ -1087,7 +1088,9 @@ build_arrow() {
         ldflags="-L${TP_LIB_DIR}"
     fi
 
-    LDFLAGS="${ldflags}" \
+    CPPFLAGS="-I${TP_INCLUDE_DIR}" \
+        CXXFLAGS="-I${TP_INCLUDE_DIR}" \
+        LDFLAGS="${ldflags}" \
         "${CMAKE_CMD}" -DCMAKE_POLICY_VERSION_MINIMUM=3.5 \
         -DCMAKE_CXX_STANDARD="${TP_CXX_STANDARD}" \
         -G "${GENERATOR}" -DARROW_PARQUET=ON -DARROW_IPC=ON -DARROW_BUILD_SHARED=OFF \
diff --git a/thirdparty/download-thirdparty.sh b/thirdparty/download-thirdparty.sh
index 1c965b65c4155f..b7eb5bd9634b2f 100755
--- a/thirdparty/download-thirdparty.sh
+++ b/thirdparty/download-thirdparty.sh
@@ -448,6 +448,9 @@ if [[ " ${TP_ARCHIVES[*]} " =~ " ARROW " ]]; then
             # apache-arrow-17.0.0-force-write-int96-timestamps.patch : 
             # Introducing the parameter that forces writing int96 timestampes for compatibility with Paimon cpp. 
             patch -p1 <"${TP_PATCH_DIR}/apache-arrow-17.0.0-force-write-int96-timestamps.patch"
+
+            # Add Parquet LZO page decompression support used by file scanner v2.
+            patch -p1 <"${TP_PATCH_DIR}/apache-arrow-17.0.0-lzo.patch"
             touch "${PATCHED_MARK}"
         fi
         cd -
diff --git a/thirdparty/patches/apache-arrow-17.0.0-lzo.patch b/thirdparty/patches/apache-arrow-17.0.0-lzo.patch
new file mode 100644
index 00000000000000..a983818413a01c
--- /dev/null
+++ b/thirdparty/patches/apache-arrow-17.0.0-lzo.patch
@@ -0,0 +1,84 @@
+--- a/cpp/src/parquet/column_reader.cc
++++ b/cpp/src/parquet/column_reader.cc
+@@ -30,0 +31,2 @@
++
++#include <lzo/lzo1x.h>
+@@ -268,0 +269 @@
++        compression_codec_(codec),
+@@ -279 +282,7 @@
+-    decompressor_ = GetCodec(codec);
++    if (compression_codec_ == Compression::LZO) {
++      if (lzo_init() != LZO_E_OK) {
++        throw ParquetException("Failed to initialize LZO codec");
++      }
++    } else {
++      decompressor_ = GetCodec(codec);
++    }
+@@ -315,0 +325 @@
++  Compression::type compression_codec_;
+@@ -585 +595 @@
+-  if (decompressor_ == nullptr) {
++  if (decompressor_ == nullptr && compression_codec_ != Compression::LZO) {
+@@ -601,0 +612,61 @@
++  if (compression_codec_ == Compression::LZO) {
++    const uint8_t* input = page_buffer->data() + levels_byte_len;
++    const uint8_t* const input_end = page_buffer->data() + compressed_len;
++    uint8_t* output = decompression_buffer_->mutable_data() + levels_byte_len;
++    uint8_t* const output_end = decompression_buffer_->mutable_data() + uncompressed_len;
++
++    auto load_big_endian_u32 = [](const uint8_t* data) {
++      return (static_cast<uint32_t>(data[0]) << 24) |
++             (static_cast<uint32_t>(data[1]) << 16) |
++             (static_cast<uint32_t>(data[2]) << 8) | static_cast<uint32_t>(data[3]);
++    };
++
++    while (input < input_end) {
++      if (input_end - input < 4) {
++        throw ParquetException("LZO page decompression failed: truncated large block length");
++      }
++
++      uint32_t large_block_uncompressed_len = load_big_endian_u32(input);
++      input += 4;
++      if (static_cast<size_t>(output_end - output) < large_block_uncompressed_len) {
++        throw ParquetException("LZO page decompression failed: output buffer too small");
++      }
++
++      while (large_block_uncompressed_len > 0) {
++        if (input_end - input < 4) {
++          throw ParquetException("LZO page decompression failed: truncated small block length");
++        }
++
++        uint32_t small_block_compressed_len = load_big_endian_u32(input);
++        input += 4;
++        if (static_cast<size_t>(input_end - input) < small_block_compressed_len) {
++          throw ParquetException("LZO page decompression failed: truncated small block data");
++        }
++
++        auto small_block_uncompressed_len =
++            static_cast<lzo_uint>(large_block_uncompressed_len);
++        const int result =
++            lzo1x_decompress_safe(input, static_cast<lzo_uint>(small_block_compressed_len),
++                                  output, &small_block_uncompressed_len, nullptr);
++        if (result != LZO_E_OK) {
++          throw ParquetException("LZO page decompression failed, error: " +
++                                 std::to_string(result));
++        }
++        if (small_block_uncompressed_len > large_block_uncompressed_len) {
++          throw ParquetException("LZO page decompression failed: invalid small block size");
++        }
++
++        input += small_block_compressed_len;
++        output += small_block_uncompressed_len;
++        large_block_uncompressed_len -= small_block_uncompressed_len;
++      }
++    }
++    if (output != output_end) {
++      throw ParquetException("Page didn't decompress to expected size, expected: " +
++                             std::to_string(uncompressed_len - levels_byte_len) + ", but got:" +
++                             std::to_string(output - (decompression_buffer_->mutable_data() +
++                                                      levels_byte_len)));
++    }
++
++    return decompression_buffer_;
++  }
++
diff --git a/thirdparty/vars.sh b/thirdparty/vars.sh
index 13ab593312d7d9..af46e566b8a30f 100644
--- a/thirdparty/vars.sh
+++ b/thirdparty/vars.sh
@@ -242,10 +242,10 @@ BROTLI_SOURCE="brotli-1.0.9"
 BROTLI_MD5SUM="c2274f0c7af8470ad514637c35bcee7d"
 
 # flatbuffers
-FLATBUFFERS_DOWNLOAD="https://github.com/google/flatbuffers/archive/v2.0.0.tar.gz"
-FLATBUFFERS_NAME=flatbuffers-2.0.0.tar.gz
-FLATBUFFERS_SOURCE=flatbuffers-2.0.0
-FLATBUFFERS_MD5SUM="a27992324c3cbf86dd888268a23d17bd"
+FLATBUFFERS_DOWNLOAD="https://github.com/google/flatbuffers/archive/v23.5.26.tar.gz"
+FLATBUFFERS_NAME=flatbuffers-23.5.26.tar.gz
+FLATBUFFERS_SOURCE=flatbuffers-23.5.26
+FLATBUFFERS_MD5SUM="2ef00eaaa86ab5e9ad5eafe09c2e7b60"
 
 # c-ares
 CARES_DOWNLOAD="https://github.com/c-ares/c-ares/releases/download/cares-1_19_1/c-ares-1.19.1.tar.gz"