From 718fd06fed288b9fe4a41434d08388578ebeb46b Mon Sep 17 00:00:00 2001 From: dantengsky Date: Fri, 14 Nov 2025 19:57:27 +0800 Subject: [PATCH 1/5] refactor: bump arrow*/parquet to 56 - upgrade arrow* + parquet crates to v56 - patch iceberg to use arrow 56 as well https://github.com/databendlabs/iceberg-rust/pull/4 - patch orc-rust to use arrow 56 as well https://github.com/datafuse-extras/orc-rust/pull/1 - patch arrow-udf-runtime to use arrow 56 as well https://github.com/datafuse-extras/arrow-udf/pull/1 - bump arrow* from 55 to 56 - bump tonic from 0.12 to 0.13 - bump pyo3 from 0.24.1 to 0.25 - bump pyo3-build-config from 0.24 to 0.25 - bump pyo3-build-config from 0.24.2 to 0.25 - bump pyo3 from 0.24 to 0.25 --- Cargo.lock | 827 +++++++++++++----- Cargo.toml | 46 +- src/bendpy/Cargo.toml | 4 +- src/common/cloud_control/Cargo.toml | 1 + .../src/servers/flight/flight_service.rs | 6 +- .../servers/flight_sql/flight_sql_server.rs | 6 +- .../tests/it/parquet_rs/prune_row_groups.rs | 6 +- src/query/storages/delta/Cargo.toml | 1 + .../storages/delta/src/arrow56_conversion.rs | 321 +++++++ src/query/storages/delta/src/lib.rs | 2 + src/query/storages/delta/src/table.rs | 3 +- .../src/parquet_reader/reader/full_reader.rs | 13 +- .../storages/parquet/src/statistics/column.rs | 63 +- .../storages/parquet/src/statistics/page.rs | 4 +- 14 files changed, 1013 insertions(+), 290 deletions(-) create mode 100644 src/query/storages/delta/src/arrow56_conversion.rs diff --git a/Cargo.lock b/Cargo.lock index 74a6ab9b0edc7..f400d6011762e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -314,20 +314,41 @@ version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1bb018b6960c87fd9d025009820406f74e83281185a8bdcb44880d2aa5c9a87" dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", - "pyo3", + "arrow-arith 55.1.0", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-cast 55.1.0", + "arrow-csv 55.1.0", + "arrow-data 55.1.0", + "arrow-ipc 55.1.0", + "arrow-json 55.1.0", + "arrow-ord 55.1.0", + "arrow-row 55.1.0", + "arrow-schema 55.1.0", + "arrow-select 55.1.0", + "arrow-string 55.1.0", +] + +[[package]] +name = "arrow" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" +dependencies = [ + "arrow-arith 56.2.0", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-csv 56.2.0", + "arrow-data 56.2.0", + "arrow-ipc 56.2.0", + "arrow-json 56.2.0", + "arrow-ord 56.2.0", + "arrow-pyarrow", + "arrow-row 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "arrow-string 56.2.0", ] [[package]] @@ -336,10 +357,24 @@ version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44de76b51473aa888ecd6ad93ceb262fb8d40d1f1154a4df2f069b3590aa7575" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", + "chrono", + "num", +] + +[[package]] +name = "arrow-arith" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "chrono", "num", ] @@ -351,9 +386,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29ed77e22744475a9a53d00026cf8e166fe73cf42d89c4c4ae63607ee1cfcc3f" dependencies = [ "ahash 0.8.12", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", "chrono", "chrono-tz 0.10.3", "half", @@ -361,6 +396,23 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-array" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +dependencies = [ + "ahash 0.8.12", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "chrono", + "chrono-tz 0.10.3", + "half", + "hashbrown 0.16.0", + "num", +] + [[package]] name = "arrow-buffer" version = "55.1.0" @@ -372,17 +424,49 @@ dependencies = [ "num", ] +[[package]] +name = "arrow-buffer" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" +dependencies = [ + "bytes", + "half", + "num", +] + [[package]] name = "arrow-cast" version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f39e1d774ece9292697fcbe06b5584401b26bd34be1bec25c33edae65c2420ff" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", + "arrow-select 55.1.0", + "atoi", + "base64 0.22.1", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", + "ryu", +] + +[[package]] +name = "arrow-cast" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", "atoi", "base64 0.22.1", "chrono", @@ -399,9 +483,9 @@ version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9055c972a07bf12c2a827debfd34f88d3b93da1941d36e1d9fee85eebe38a12a" dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", + "arrow-array 55.1.0", + "arrow-cast 55.1.0", + "arrow-schema 55.1.0", "chrono", "csv", "csv-core", @@ -409,35 +493,62 @@ dependencies = [ "regex", ] +[[package]] +name = "arrow-csv" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" +dependencies = [ + "arrow-array 56.2.0", + "arrow-cast 56.2.0", + "arrow-schema 56.2.0", + "chrono", + "csv", + "csv-core", + "regex", +] + [[package]] name = "arrow-data" version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf75ac27a08c7f48b88e5c923f267e980f27070147ab74615ad85b5c5f90473d" dependencies = [ - "arrow-buffer", - "arrow-schema", + "arrow-buffer 55.1.0", + "arrow-schema 55.1.0", "half", "num", ] [[package]] -name = "arrow-flight" -version = "55.1.0" +name = "arrow-data" +version = "56.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91efc67a4f5a438833dd76ef674745c80f6f6b9a428a3b440cbfbf74e32867e6" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", +checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" +dependencies = [ + "arrow-buffer 56.2.0", + "arrow-schema 56.2.0", + "half", + "num", +] + +[[package]] +name = "arrow-flight" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c8b0ba0784d56bc6266b79f5de7a24b47024e7b3a0045d2ad4df3d9b686099f" +dependencies = [ + "arrow-arith 56.2.0", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-data 56.2.0", + "arrow-ipc 56.2.0", + "arrow-ord 56.2.0", + "arrow-row 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "arrow-string 56.2.0", "base64 0.22.1", "bytes", "futures", @@ -445,7 +556,7 @@ dependencies = [ "paste", "prost", "prost-types", - "tonic", + "tonic 0.13.1", ] [[package]] @@ -454,10 +565,24 @@ version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a222f0d93772bd058d1268f4c28ea421a603d66f7979479048c429292fac7b2e" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", + "flatbuffers", +] + +[[package]] +name = "arrow-ipc" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", "flatbuffers", "lz4_flex", "zstd 0.13.3", @@ -469,11 +594,33 @@ version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9085342bbca0f75e8cb70513c0807cc7351f1fbf5cb98192a67d5e3044acb033" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-cast 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", + "chrono", + "half", + "indexmap 2.9.0", + "lexical-core", + "memchr", + "num", + "serde", + "serde_json", + "simdutf8", +] + +[[package]] +name = "arrow-json" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "chrono", "half", "indexmap 2.9.0", @@ -491,11 +638,36 @@ version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab2f1065a5cad7b9efa9e22ce5747ce826aa3855766755d4904535123ef431e7" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", + "arrow-select 55.1.0", +] + +[[package]] +name = "arrow-ord" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", +] + +[[package]] +name = "arrow-pyarrow" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258" +dependencies = [ + "arrow-array 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "pyo3", ] [[package]] @@ -504,10 +676,23 @@ version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3703a0e3e92d23c3f756df73d2dc9476873f873a76ae63ef9d3de17fda83b2d8" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", + "half", +] + +[[package]] +name = "arrow-row" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "half", ] @@ -521,6 +706,16 @@ dependencies = [ "serde", ] +[[package]] +name = "arrow-schema" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" +dependencies = [ + "bitflags 2.9.0", + "serde", +] + [[package]] name = "arrow-select" version = "55.1.0" @@ -528,10 +723,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24b7b85575702b23b85272b01bc1c25a01c9b9852305e5d0078c79ba25d995d4" dependencies = [ "ahash 0.8.12", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", + "num", +] + +[[package]] +name = "arrow-select" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" +dependencies = [ + "ahash 0.8.12", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "num", ] @@ -541,11 +750,28 @@ version = "55.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9260fddf1cdf2799ace2b4c2fc0356a9789fa7551e0953e35435536fecefebbd" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-data 55.1.0", + "arrow-schema 55.1.0", + "arrow-select 55.1.0", + "memchr", + "num", + "regex", + "regex-syntax 0.8.5", +] + +[[package]] +name = "arrow-string" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" +dependencies = [ + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", "memchr", "num", "regex", @@ -555,14 +781,14 @@ dependencies = [ [[package]] name = "arrow-udf-runtime" version = "0.8.0" -source = "git+https://github.com/datafuse-extras/arrow-udf.git?rev=a442343#a44234332e9c182c247a510c3721b655572f323c" +source = "git+https://github.com/datafuse-extras/arrow-udf.git?rev=2480dccf1#2480dccf1bad1a88d39a7c084ed7d54685e93735" dependencies = [ "anyhow", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-ipc 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", "async-trait", "atomic-time", "base64 0.22.1", @@ -1207,14 +1433,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ "async-trait", - "axum-core", + "axum-core 0.4.5", "bytes", "futures-util", "http 1.3.1", "http-body 1.0.1", "http-body-util", "itoa", - "matchit", + "matchit 0.7.3", "memchr", "mime", "percent-encoding", @@ -1227,6 +1453,31 @@ dependencies = [ "tower-service", ] +[[package]] +name = "axum" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b098575ebe77cb6d14fc7f32749631a6e44edbef6b796f89b020e99ba20d425" +dependencies = [ + "axum-core 0.5.5", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "itoa", + "matchit 0.8.4", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "serde_core", + "sync_wrapper", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + [[package]] name = "axum-core" version = "0.4.5" @@ -1247,6 +1498,24 @@ dependencies = [ "tower-service", ] +[[package]] +name = "axum-core" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22" +dependencies = [ + "bytes", + "futures-core", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "sync_wrapper", + "tower-layer", + "tower-service", +] + [[package]] name = "backoff" version = "0.4.0" @@ -1331,8 +1600,8 @@ checksum = "3a8241f3ebb85c056b509d4327ad0358fbbba6ffb340bf388f26350aeda225b1" name = "bendpy" version = "0.1.0" dependencies = [ - "arrow", - "arrow-schema", + "arrow 56.2.0", + "arrow-schema 56.2.0", "ctor", "databend-common-base", "databend-common-catalog", @@ -2335,12 +2604,14 @@ dependencies = [ [[package]] name = "comfy-table" -version = "7.1.4" +version = "7.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a" +checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" dependencies = [ - "crossterm", - "unicode-segmentation", + "crossterm 0.27.0", + "crossterm 0.28.1", + "strum 0.26.3", + "strum_macros 0.26.4", "unicode-width 0.2.0", ] @@ -2751,6 +3022,19 @@ version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crossterm" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f476fe445d41c9e991fd07515a6f463074b782242ccf4a5b7b1d1012e70824df" +dependencies = [ + "bitflags 2.9.0", + "crossterm_winapi", + "libc", + "parking_lot 0.12.3", + "winapi", +] + [[package]] name = "crossterm" version = "0.28.1" @@ -2758,10 +3042,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" dependencies = [ "bitflags 2.9.0", - "crossterm_winapi", "parking_lot 0.12.3", "rustix 0.38.44", - "winapi", ] [[package]] @@ -3158,7 +3440,7 @@ dependencies = [ name = "databend-common-catalog" version = "0.1.0" dependencies = [ - "arrow-schema", + "arrow-schema 56.2.0", "async-backtrace", "async-trait", "chrono", @@ -3184,7 +3466,7 @@ dependencies = [ "log", "maplit", "parking_lot 0.12.3", - "parquet", + "parquet 56.2.0", "rand 0.8.5", "roaring", "serde", @@ -3209,7 +3491,7 @@ dependencies = [ "prost-build", "semver", "serde", - "tonic", + "tonic 0.13.1", "tonic-build", "tower 0.5.2", ] @@ -3218,9 +3500,9 @@ dependencies = [ name = "databend-common-column" version = "0.1.0" dependencies = [ - "arrow-buffer", - "arrow-data", - "arrow-schema", + "arrow-buffer 56.2.0", + "arrow-data 56.2.0", + "arrow-schema 56.2.0", "borsh", "bytemuck", "databend-common-base", @@ -3296,7 +3578,7 @@ version = "0.1.0" dependencies = [ "anyhow", "arrow-flight", - "arrow-schema", + "arrow-schema 56.2.0", "backtrace", "bincode 2.0.1", "cidr", @@ -3310,7 +3592,7 @@ dependencies = [ "object", "once_cell", "opendal", - "parquet", + "parquet 56.2.0", "paste", "prost", "redis", @@ -3321,22 +3603,22 @@ dependencies = [ "sqlx", "tantivy", "thiserror 1.0.69", - "tonic", + "tonic 0.13.1", ] [[package]] name = "databend-common-expression" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-data 56.2.0", "arrow-flight", - "arrow-ipc", - "arrow-ord", - "arrow-schema", - "arrow-select", + "arrow-ipc 56.2.0", + "arrow-ord 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", "async-backtrace", "base64 0.22.1", "borsh", @@ -3385,7 +3667,7 @@ dependencies = [ "serde_json", "strength_reduce", "terminal_size", - "tonic", + "tonic 0.13.1", "typetag", "unicode-segmentation", ] @@ -3519,7 +3801,7 @@ dependencies = [ "log", "serde", "thiserror 1.0.69", - "tonic", + "tonic 0.13.1", "tower-service", ] @@ -3658,7 +3940,7 @@ dependencies = [ "serde_json", "thiserror 1.0.69", "tokio", - "tonic", + "tonic 0.13.1", ] [[package]] @@ -3726,7 +4008,7 @@ dependencies = [ "log", "pretty_assertions", "sub-cache", - "tonic", + "tonic 0.13.1", ] [[package]] @@ -3763,7 +4045,7 @@ dependencies = [ "serde", "serde_json", "thiserror 1.0.69", - "tonic", + "tonic 0.13.1", ] [[package]] @@ -3905,7 +4187,7 @@ dependencies = [ "seq-marked", "thiserror 1.0.69", "tokio", - "tonic", + "tonic 0.13.1", ] [[package]] @@ -3958,7 +4240,7 @@ dependencies = [ "tempfile", "tokio", "tokio-stream", - "tonic", + "tonic 0.13.1", ] [[package]] @@ -3989,7 +4271,7 @@ dependencies = [ "state-machine-api", "tempfile", "thiserror 1.0.69", - "tonic", + "tonic 0.13.1", "tonic-build", ] @@ -4052,9 +4334,9 @@ dependencies = [ name = "databend-common-pipeline-transforms" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-ord", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-ord 56.2.0", + "arrow-schema 56.2.0", "async-backtrace", "async-channel", "async-trait", @@ -4117,7 +4399,7 @@ dependencies = [ "prost", "prost-build", "semver", - "tonic", + "tonic 0.13.1", "tonic-build", ] @@ -4232,7 +4514,7 @@ version = "0.1.0" dependencies = [ "ahash 0.8.12", "anyhow", - "arrow-schema", + "arrow-schema 56.2.0", "async-backtrace", "borsh", "chrono", @@ -4252,7 +4534,7 @@ dependencies = [ "log", "lru", "opendal", - "parquet", + "parquet 56.2.0", "prometheus-client 0.22.3", "regex", "reqwest", @@ -4266,7 +4548,7 @@ dependencies = [ name = "databend-common-storages-basic" version = "0.1.0" dependencies = [ - "arrow", + "arrow 56.2.0", "async-backtrace", "async-trait", "databend-common-base", @@ -4284,7 +4566,7 @@ dependencies = [ "databend-storages-common-table-meta", "opendal", "parking_lot 0.12.3", - "parquet", + "parquet 56.2.0", "serde", "serde_json", "sha2", @@ -4297,7 +4579,7 @@ dependencies = [ name = "databend-common-storages-delta" version = "0.1.0" dependencies = [ - "arrow-schema", + "arrow-schema 56.2.0", "async-backtrace", "async-trait", "databend-common-base", @@ -4313,8 +4595,9 @@ dependencies = [ "databend-storages-common-table-meta", "deltalake", "fastrace", + "itertools 0.13.0", "object_store_opendal", - "parquet", + "parquet 56.2.0", "serde", "serde_json", "tokio", @@ -4343,10 +4626,10 @@ name = "databend-common-storages-fuse" version = "0.1.0" dependencies = [ "ahash 0.8.12", - "arrow", - "arrow-array", - "arrow-ipc", - "arrow-schema", + "arrow 56.2.0", + "arrow-array 56.2.0", + "arrow-ipc 56.2.0", + "arrow-schema 56.2.0", "async-backtrace", "async-channel", "async-trait", @@ -4394,7 +4677,7 @@ dependencies = [ "match-template", "opendal", "parking_lot 0.12.3", - "parquet", + "parquet 56.2.0", "paste", "rand 0.8.5", "roaring", @@ -4440,7 +4723,7 @@ dependencies = [ "hive_metastore", "log", "opendal", - "parquet", + "parquet 56.2.0", "recursive", "serde", "typetag", @@ -4451,7 +4734,7 @@ dependencies = [ name = "databend-common-storages-iceberg" version = "0.1.0" dependencies = [ - "arrow-schema", + "arrow-schema 56.2.0", "async-backtrace", "async-trait", "chrono", @@ -4500,8 +4783,8 @@ dependencies = [ name = "databend-common-storages-orc" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "async-backtrace", "async-trait", "bytes", @@ -4532,10 +4815,10 @@ dependencies = [ name = "databend-common-storages-parquet" version = "0.1.0" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-schema 56.2.0", "async-backtrace", "async-trait", "bytes", @@ -4559,7 +4842,7 @@ dependencies = [ "jiff", "log", "opendal", - "parquet", + "parquet 56.2.0", "rand 0.8.5", "serde", "thrift", @@ -4571,7 +4854,7 @@ name = "databend-common-storages-stage" version = "0.1.0" dependencies = [ "apache-avro", - "arrow-schema", + "arrow-schema 56.2.0", "async-backtrace", "async-trait", "bstr", @@ -4604,7 +4887,7 @@ dependencies = [ "num-traits", "opendal", "parking_lot 0.12.3", - "parquet", + "parquet 56.2.0", "serde", "serde_json", "typetag", @@ -4688,8 +4971,8 @@ name = "databend-common-tracing" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", + "arrow-array 56.2.0", + "arrow-schema 56.2.0", "async-channel", "backtrace", "concurrent-queue", @@ -4708,11 +4991,11 @@ dependencies = [ "opentelemetry 0.29.1", "opentelemetry-otlp 0.29.0", "opentelemetry_sdk 0.29.0", - "parquet", + "parquet 56.2.0", "serde", "serde_json", "toml 0.8.22", - "tonic", + "tonic 0.13.1", ] [[package]] @@ -5105,7 +5388,7 @@ dependencies = [ "thiserror 1.0.69", "tokio", "tokio-stream", - "tonic", + "tonic 0.13.1", "tonic-reflection", "watcher", ] @@ -5153,15 +5436,15 @@ name = "databend-query" version = "0.1.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-csv 56.2.0", "arrow-flight", - "arrow-ipc", - "arrow-json", - "arrow-schema", - "arrow-select", + "arrow-ipc 56.2.0", + "arrow-json 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", "arrow-udf-runtime", "async-backtrace", "async-channel", @@ -5282,7 +5565,7 @@ dependencies = [ "opentelemetry_sdk 0.29.0", "p256", "parking_lot 0.12.3", - "parquet", + "parquet 56.2.0", "paste", "petgraph 0.6.5", "pin-project-lite", @@ -5315,7 +5598,7 @@ dependencies = [ "tokio-stream", "tokio-util", "toml 0.8.22", - "tonic", + "tonic 0.13.1", "tower 0.5.2", "typetag", "url", @@ -5391,14 +5674,14 @@ dependencies = [ "databend-common-expression", "databend-storages-common-table-meta", "parking_lot 0.12.3", - "parquet", + "parquet 56.2.0", ] [[package]] name = "databend-storages-common-cache" version = "0.1.0" dependencies = [ - "arrow", + "arrow 56.2.0", "async-backtrace", "async-trait", "bytes", @@ -5418,7 +5701,7 @@ dependencies = [ "log", "mockall", "parking_lot 0.12.3", - "parquet", + "parquet 56.2.0", "rayon", "rustix 0.38.44", "siphasher 0.3.11", @@ -5454,7 +5737,7 @@ dependencies = [ "num_cpus", "ordered-float 5.1.0", "parking_lot 0.12.3", - "parquet", + "parquet 56.2.0", "rand 0.8.5", "rayon", "roaring", @@ -5484,7 +5767,7 @@ dependencies = [ "futures", "log", "opendal", - "parquet", + "parquet 56.2.0", ] [[package]] @@ -5523,7 +5806,7 @@ dependencies = [ name = "databend-storages-common-stage" version = "0.1.0" dependencies = [ - "arrow-array", + "arrow-array 56.2.0", "databend-common-ast", "databend-common-catalog", "databend-common-exception", @@ -5539,7 +5822,7 @@ dependencies = [ name = "databend-storages-common-table-meta" version = "0.1.0" dependencies = [ - "arrow", + "arrow 56.2.0", "bincode 1.3.3", "bytes", "chrono", @@ -5555,7 +5838,7 @@ dependencies = [ "databend-common-storage", "enum-as-inner", "log", - "parquet", + "parquet 56.2.0", "rmp-serde", "serde", "serde_json", @@ -5641,7 +5924,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c96f51383ba327a1403e6e3458f8fc979d09d7200af56fa32681619f6c760dee" dependencies = [ - "arrow", + "arrow 55.1.0", "bytes", "chrono", "delta_kernel_derive", @@ -5649,7 +5932,7 @@ dependencies = [ "indexmap 2.9.0", "itertools 0.14.0", "object_store", - "parquet", + "parquet 55.1.0", "reqwest", "roaring", "rustc_version", @@ -5688,17 +5971,17 @@ name = "deltalake-core" version = "0.26.0" source = "git+https://github.com/delta-io/delta-rs?rev=9954bff#9954bff62fc46bfe63734eba7a78a27b90295755" dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", + "arrow 55.1.0", + "arrow-arith 55.1.0", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-cast 55.1.0", + "arrow-ipc 55.1.0", + "arrow-json 55.1.0", + "arrow-ord 55.1.0", + "arrow-row 55.1.0", + "arrow-schema 55.1.0", + "arrow-select 55.1.0", "async-trait", "bytes", "cfg-if", @@ -5717,7 +6000,7 @@ dependencies = [ "num_cpus", "object_store", "parking_lot 0.12.3", - "parquet", + "parquet 55.1.0", "percent-encoding", "pin-project-lite", "rand 0.8.5", @@ -8236,6 +8519,12 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashbrown" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + [[package]] name = "hashlink" version = "0.8.4" @@ -8769,19 +9058,19 @@ dependencies = [ [[package]] name = "iceberg" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=1dace26ea25a9b9e2066367cbd3b7badc75dd7f9#1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "anyhow", "apache-avro", "array-init", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-ord", - "arrow-schema", - "arrow-select", - "arrow-string", + "arrow-arith 56.2.0", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-ord 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "arrow-string 56.2.0", "async-trait", "bimap", "bytes", @@ -8796,7 +9085,7 @@ dependencies = [ "once_cell", "opendal", "ordered-float 4.6.0", - "parquet", + "parquet 56.2.0", "rand 0.8.5", "reqwest", "roaring", @@ -8818,7 +9107,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-glue" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=1dace26ea25a9b9e2066367cbd3b7badc75dd7f9#1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "anyhow", "async-trait", @@ -8835,7 +9124,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-hms" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=1dace26ea25a9b9e2066367cbd3b7badc75dd7f9#1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "anyhow", "async-trait", @@ -8859,7 +9148,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-rest" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=1dace26ea25a9b9e2066367cbd3b7badc75dd7f9#1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "async-trait", "chrono", @@ -8879,7 +9168,7 @@ dependencies = [ [[package]] name = "iceberg-catalog-s3tables" version = "0.4.0" -source = "git+https://github.com/databendlabs/iceberg-rust?rev=1dace26ea25a9b9e2066367cbd3b7badc75dd7f9#1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" +source = "git+https://github.com/databendlabs/iceberg-rust?rev=32b1403#32b1403eef8b00d7f2a526c551aa35b8fc31927e" dependencies = [ "anyhow", "async-trait", @@ -10165,6 +10454,12 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" +[[package]] +name = "matchit" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e1ffaa40ddd1f3ed91f717a33c8c0ee23fff369e3aa8772b9605cc1d22f4c3" + [[package]] name = "matrixmultiply" version = "0.3.10" @@ -11204,7 +11499,7 @@ dependencies = [ "serde_json", "thiserror 2.0.12", "tokio", - "tonic", + "tonic 0.12.3", "tracing", ] @@ -11224,7 +11519,7 @@ dependencies = [ "reqwest", "thiserror 2.0.12", "tokio", - "tonic", + "tonic 0.12.3", "tracing", ] @@ -11240,7 +11535,7 @@ dependencies = [ "opentelemetry_sdk 0.28.0", "prost", "serde", - "tonic", + "tonic 0.12.3", ] [[package]] @@ -11252,7 +11547,7 @@ dependencies = [ "opentelemetry 0.29.1", "opentelemetry_sdk 0.29.0", "prost", - "tonic", + "tonic 0.12.3", ] [[package]] @@ -11299,9 +11594,9 @@ dependencies = [ [[package]] name = "orc-rust" version = "0.6.0" -source = "git+https://github.com/datafuse-extras/orc-rust?rev=d82aa6d#d82aa6de06f49f8d3884d447041bf2d3c4daff4c" +source = "git+https://github.com/datafuse-extras/orc-rust?rev=fc812ad7010#fc812ad7010c5ab9753a0d93a31dbeed0bcbf3b7" dependencies = [ - "arrow", + "arrow 56.2.0", "async-trait", "bytemuck", "bytes", @@ -11471,13 +11766,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be7b2d778f6b841d37083ebdf32e33a524acde1266b5884a8ca29bf00dfa1231" dependencies = [ "ahash 0.8.12", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", + "arrow-array 55.1.0", + "arrow-buffer 55.1.0", + "arrow-cast 55.1.0", + "arrow-data 55.1.0", + "arrow-ipc 55.1.0", + "arrow-schema 55.1.0", + "arrow-select 55.1.0", "base64 0.22.1", "brotli 8.0.1", "bytes", @@ -11500,6 +11795,41 @@ dependencies = [ "zstd 0.13.3", ] +[[package]] +name = "parquet" +version = "56.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" +dependencies = [ + "ahash 0.8.12", + "arrow-array 56.2.0", + "arrow-buffer 56.2.0", + "arrow-cast 56.2.0", + "arrow-data 56.2.0", + "arrow-ipc 56.2.0", + "arrow-schema 56.2.0", + "arrow-select 56.2.0", + "base64 0.22.1", + "brotli 8.0.1", + "bytes", + "chrono", + "flate2", + "futures", + "half", + "hashbrown 0.16.0", + "lz4_flex", + "num", + "num-bigint", + "paste", + "seq-macro", + "simdutf8", + "snap", + "thrift", + "tokio", + "twox-hash 2.1.0", + "zstd 0.13.3", +] + [[package]] name = "parse-display" version = "0.9.1" @@ -12470,11 +12800,10 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.24.2" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5203598f366b11a02b13aa20cab591229ff0a89fd121a308a5df751d5fc9219" +checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a" dependencies = [ - "cfg-if", "indoc", "libc", "memoffset", @@ -12488,9 +12817,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.24.2" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99636d423fa2ca130fa5acde3059308006d46f98caac629418e53f7ebb1e9999" +checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598" dependencies = [ "once_cell", "python3-dll-a", @@ -12499,9 +12828,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.24.2" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78f9cf92ba9c409279bc3305b5409d90db2d2c22392d443a87df3a1adad59e33" +checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c" dependencies = [ "libc", "pyo3-build-config", @@ -12509,9 +12838,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.24.2" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b999cb1a6ce21f9a6b147dcf1be9ffedf02e0043aec74dc390f3007047cecd9" +checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -12521,9 +12850,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.24.2" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "822ece1c7e1012745607d5cf0bcb2874769f0f7cb34c4cde03b9358eb9ef911a" +checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -13808,10 +14137,11 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" dependencies = [ + "serde_core", "serde_derive", ] @@ -13852,11 +14182,20 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -15747,7 +16086,7 @@ checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" dependencies = [ "async-stream", "async-trait", - "axum", + "axum 0.7.9", "base64 0.22.1", "bytes", "flate2", @@ -15761,13 +16100,41 @@ dependencies = [ "percent-encoding", "pin-project", "prost", + "socket2 0.5.9", + "tokio", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tonic" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9" +dependencies = [ + "async-trait", + "axum 0.8.7", + "base64 0.22.1", + "bytes", + "h2 0.4.10", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-timeout", + "hyper-util", + "percent-encoding", + "pin-project", + "prost", "rustls-native-certs 0.8.1", - "rustls-pemfile 2.2.0", "socket2 0.5.9", "tokio", "tokio-rustls 0.26.2", "tokio-stream", - "tower 0.4.13", + "tower 0.5.2", "tower-layer", "tower-service", "tracing", @@ -15775,9 +16142,9 @@ dependencies = [ [[package]] name = "tonic-build" -version = "0.12.3" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" +checksum = "eac6f67be712d12f0b41328db3137e0d0757645d8904b4cb7d51cd9c2279e847" dependencies = [ "prettyplease", "proc-macro2", @@ -15789,15 +16156,15 @@ dependencies = [ [[package]] name = "tonic-reflection" -version = "0.12.3" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "878d81f52e7fcfd80026b7fdb6a9b578b3c3653ba987f87f0dce4b64043cba27" +checksum = "f9687bd5bfeafebdded2356950f278bba8226f0b32109537c4253406e09aafe1" dependencies = [ "prost", "prost-types", "tokio", "tokio-stream", - "tonic", + "tonic 0.13.1", ] [[package]] @@ -15828,11 +16195,15 @@ checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" dependencies = [ "futures-core", "futures-util", + "indexmap 2.9.0", "pin-project-lite", + "slab", "sync_wrapper", "tokio", + "tokio-util", "tower-layer", "tower-service", + "tracing", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 21bc051b29168..8dc5a50a915e0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -217,18 +217,18 @@ anyerror = { version = "=0.1.13" } anyhow = { version = "1.0.65" } apache-avro = { version = "0.17.0", features = ["snappy", "zstandard", "xz", "snappy", "bzip"] } approx = "0.5.1" -arrow = { version = "55" } -arrow-array = { version = "55" } -arrow-buffer = { version = "55" } -arrow-cast = { version = "55", features = ["prettyprint"] } -arrow-csv = { version = "55" } -arrow-data = { version = "55" } -arrow-flight = { version = "55", features = ["flight-sql-experimental", "tls"] } -arrow-ipc = { version = "55", features = ["lz4", "zstd"] } -arrow-json = { version = "55" } -arrow-ord = { version = "55" } -arrow-schema = { version = "55", features = ["serde"] } -arrow-select = { version = "55" } +arrow = { version = "56" } +arrow-array = { version = "56" } +arrow-buffer = { version = "56" } +arrow-cast = { version = "56", features = ["prettyprint"] } +arrow-csv = { version = "56" } +arrow-data = { version = "56" } +arrow-flight = { version = "56", features = ["flight-sql-experimental", "tls-ring"] } +arrow-ipc = { version = "56", features = ["lz4", "zstd"] } +arrow-json = { version = "56" } +arrow-ord = { version = "56" } +arrow-schema = { version = "56", features = ["serde"] } +arrow-select = { version = "56" } arrow-udf-runtime = { version = "0.8.0", default-features = false, features = ["javascript", "wasm"] } async-backtrace = "0.2" async-channel = "2.3.1" @@ -330,13 +330,13 @@ hyper-util = { version = "0.1.9", features = ["client", "client-legacy", "tokio" lru = "0.12" ## in branch dev -iceberg = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "1dace26ea25a9b9e2066367cbd3b7badc75dd7f9", features = [ +iceberg = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403", features = [ "storage-all", ] } -iceberg-catalog-glue = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" } -iceberg-catalog-hms = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" } -iceberg-catalog-rest = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" } -iceberg-catalog-s3tables = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "1dace26ea25a9b9e2066367cbd3b7badc75dd7f9" } +iceberg-catalog-glue = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403" } +iceberg-catalog-hms = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403" } +iceberg-catalog-rest = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403" } +iceberg-catalog-s3tables = { version = "0.4.0", git = "https://github.com/databendlabs/iceberg-rust", rev = "32b1403" } # Explicitly specify compatible AWS SDK versions aws-config = "1.5.18" @@ -415,7 +415,7 @@ ordered-float = { version = "5.1.0", default-features = false } ordq = "0.2.0" p256 = "0.13" parking_lot = "0.12.1" -parquet = { version = "55", features = ["async"] } +parquet = { version = "56", features = ["async"] } passwords = { version = "3.1.16" } paste = "1.0.15" percent-encoding = "2.3.1" @@ -518,9 +518,9 @@ tokio = { version = "1.35.0", features = ["full"] } tokio-stream = { version = "0.1.11", features = ["net"] } tokio-util = { version = "0.7.13" } toml = { version = "0.8", features = ["parse"] } -tonic = { version = "0.12.3", features = ["transport", "codegen", "prost", "tls-roots", "tls"] } -tonic-build = { version = "0.12.3" } -tonic-reflection = { version = "0.12.3" } +tonic = { version = "0.13", features = ["transport", "codegen", "tls-native-roots"] } +tonic-build = { version = "0.13" } +tonic-reflection = { version = "0.13" } tower = { version = "0.5.1", features = ["util"] } tower-service = "0.3.3" twox-hash = "1.6.3" @@ -643,7 +643,7 @@ overflow-checks = true rpath = true [patch.crates-io] -arrow-udf-runtime = { git = "https://github.com/datafuse-extras/arrow-udf.git", rev = "a442343" } +arrow-udf-runtime = { git = "https://github.com/datafuse-extras/arrow-udf.git", rev = "2480dccf1" } async-backtrace = { git = "https://github.com/datafuse-extras/async-backtrace.git", rev = "dea4553" } async-recursion = { git = "https://github.com/datafuse-extras/async-recursion.git", rev = "a353334" } backtrace = { git = "https://github.com/rust-lang/backtrace-rs.git", rev = "72265be" } @@ -651,7 +651,7 @@ color-eyre = { git = "https://github.com/eyre-rs/eyre.git", rev = "e5d92c3" } deltalake = { git = "https://github.com/delta-io/delta-rs", rev = "9954bff" } map-api = { git = "https://github.com/databendlabs/map-api", tag = "v0.4.2" } openraft = { git = "https://github.com/databendlabs/openraft", tag = "v0.10.0-alpha.11" } -orc-rust = { git = "https://github.com/datafuse-extras/orc-rust", rev = "d82aa6d" } +orc-rust = { git = "https://github.com/datafuse-extras/orc-rust", rev = "fc812ad7010" } recursive = { git = "https://github.com/datafuse-extras/recursive.git", rev = "16e433a" } sled = { git = "https://github.com/datafuse-extras/sled", tag = "v0.34.7-datafuse.1" } state-machine-api = { git = "https://github.com/databendlabs/state-machine-api.git", tag = "v0.3.4" } diff --git a/src/bendpy/Cargo.toml b/src/bendpy/Cargo.toml index ac7943fda7e7c..779b7e897f2e7 100644 --- a/src/bendpy/Cargo.toml +++ b/src/bendpy/Cargo.toml @@ -7,7 +7,7 @@ publish = { workspace = true } edition = { workspace = true } [build-dependencies] -pyo3-build-config = "0.24.2" +pyo3-build-config = "0.25" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] @@ -32,7 +32,7 @@ databend-query = { workspace = true, features = [ "simd", "disable_initial_exec_tls", ] } -pyo3 = { version = "0.24", features = ["generate-import-lib", "abi3-py312"] } +pyo3 = { version = "0.25", features = ["generate-import-lib", "abi3-py312"] } serde_json = { workspace = true } sysinfo = { workspace = true } tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync"] } diff --git a/src/common/cloud_control/Cargo.toml b/src/common/cloud_control/Cargo.toml index a54867e1e981d..76db8de4f8ef3 100644 --- a/src/common/cloud_control/Cargo.toml +++ b/src/common/cloud_control/Cargo.toml @@ -16,6 +16,7 @@ hyper-util = { workspace = true } prost = { workspace = true } serde = { workspace = true } tonic = { workspace = true } +#tonic-prost = { workspace = true } [build-dependencies] lenient_semver = { workspace = true } diff --git a/src/query/service/src/servers/flight/flight_service.rs b/src/query/service/src/servers/flight/flight_service.rs index 4e05a128e7ccc..c1072e84ab032 100644 --- a/src/query/service/src/servers/flight/flight_service.rs +++ b/src/query/service/src/servers/flight/flight_service.rs @@ -79,8 +79,10 @@ impl FlightService { builder }; - let incoming = TcpIncoming::new(addr, true, None) - .map_err(|e| ErrorCode::CannotListenerPort(format!("{},{}", e, addr)))?; + let incoming = TcpIncoming::bind(addr) + .map_err(|e| ErrorCode::CannotListenerPort(format!("{},{}", e, addr)))? + .with_nodelay(Some(true)) + .with_keepalive(None); let server = builder .add_service( FlightServiceServer::new(flight_api_service) diff --git a/src/query/service/src/servers/flight_sql/flight_sql_server.rs b/src/query/service/src/servers/flight_sql/flight_sql_server.rs index a8df708556387..1872935508673 100644 --- a/src/query/service/src/servers/flight_sql/flight_sql_server.rs +++ b/src/query/service/src/servers/flight_sql/flight_sql_server.rs @@ -86,8 +86,10 @@ impl FlightSQLServer { builder }; - let incoming = TcpIncoming::new(addr, true, None) - .map_err(|e| ErrorCode::CannotListenerPort(format!("{},{}", e, addr)))?; + let incoming = TcpIncoming::bind(addr) + .map_err(|e| ErrorCode::CannotListenerPort(format!("{},{}", e, addr)))? + .with_nodelay(Some(true)) + .with_keepalive(None); let server = builder .add_service(FlightServiceServer::new(flight_sql_service)) diff --git a/src/query/service/tests/it/parquet_rs/prune_row_groups.rs b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs index ae66b7c2c4941..c420fc67abd53 100644 --- a/src/query/service/tests/it/parquet_rs/prune_row_groups.rs +++ b/src/query/service/tests/it/parquet_rs/prune_row_groups.rs @@ -19,6 +19,7 @@ use databend_common_catalog::plan::ParquetReadOptions; use databend_common_expression::FunctionContext; use databend_common_expression::TableSchema; use databend_common_storages_parquet::ParquetPruner; +use parquet::file::metadata::ParquetMetaDataReader; use super::data::make_test_file_rg; use super::data::Scenario; @@ -46,8 +47,9 @@ async fn test_impl_batch(args: &[(Scenario, &str, Vec)], prune: bool) { let plan = get_data_source_plan(fixture.new_query_ctx().await.unwrap(), &sql) .await .unwrap(); - #[allow(deprecated)] - let parquet_meta = parquet::file::footer::parse_metadata(file.as_file()).unwrap(); + let parquet_meta = ParquetMetaDataReader::new() + .parse_and_finish(file.as_file()) + .unwrap(); let schema = TableSchema::try_from(arrow_schema.as_ref()).unwrap(); let leaf_fields = Arc::new(schema.leaf_fields()); diff --git a/src/query/storages/delta/Cargo.toml b/src/query/storages/delta/Cargo.toml index fef5765c56142..92b52eec41801 100644 --- a/src/query/storages/delta/Cargo.toml +++ b/src/query/storages/delta/Cargo.toml @@ -24,6 +24,7 @@ async-backtrace = { workspace = true } async-trait = { workspace = true } deltalake = { workspace = true } fastrace = { workspace = true } +itertools = { workspace = true } object_store_opendal = { workspace = true } parquet = { workspace = true } serde = { workspace = true } diff --git a/src/query/storages/delta/src/arrow56_conversion.rs b/src/query/storages/delta/src/arrow56_conversion.rs new file mode 100644 index 0000000000000..321aa1cdf861e --- /dev/null +++ b/src/query/storages/delta/src/arrow56_conversion.rs @@ -0,0 +1,321 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Conversions from Delta kernel types to Arrow v56 types. +//! Adapted from https://github.com/delta-io/delta-kernel-rs/blob/v0.10.0/kernel/src/engine/arrow_conversion.rs. +use std::fmt; +use std::sync::Arc; + +use arrow_schema::DataType as ArrowDataType; +use arrow_schema::Field as ArrowField; +use arrow_schema::Schema as ArrowSchema; +use arrow_schema::SchemaRef as ArrowSchemaRef; +use arrow_schema::TimeUnit; +use deltalake::kernel::error::Error; +use deltalake::kernel::ArrayType; +use deltalake::kernel::DataType; +use deltalake::kernel::MapType; +use deltalake::kernel::MetadataValue; +use deltalake::kernel::PrimitiveType; +use deltalake::kernel::StructField; +use deltalake::kernel::StructType; +use itertools::Itertools; + +pub(crate) const LIST_ARRAY_ROOT: &str = "element"; +pub(crate) const MAP_ROOT_DEFAULT: &str = "key_value"; +pub(crate) const MAP_KEY_DEFAULT: &str = "key"; +pub(crate) const MAP_VALUE_DEFAULT: &str = "value"; + +#[derive(Debug)] +pub(crate) enum Arrow56ConversionError { + Arrow(deltalake::arrow::error::ArrowError), + DeltaKernel(Error), + SerdeJson(serde_json::Error), + InvalidDataType(String), +} + +impl fmt::Display for Arrow56ConversionError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Arrow56ConversionError::Arrow(err) => write!(f, "{err}"), + Arrow56ConversionError::DeltaKernel(err) => write!(f, "{err}"), + Arrow56ConversionError::SerdeJson(err) => write!(f, "{err}"), + Arrow56ConversionError::InvalidDataType(msg) => write!(f, "{msg}"), + } + } +} + +impl std::error::Error for Arrow56ConversionError {} + +impl From for Arrow56ConversionError { + fn from(value: deltalake::arrow::error::ArrowError) -> Self { + Self::Arrow(value) + } +} + +impl From for Arrow56ConversionError { + fn from(value: serde_json::Error) -> Self { + Self::SerdeJson(value) + } +} + +impl From for Arrow56ConversionError { + fn from(value: Error) -> Self { + Self::DeltaKernel(value) + } +} + +pub(crate) type Result = std::result::Result; + +pub(crate) trait TryFromValue: Sized { + fn try_from_value(value: T) -> Result; +} + +pub(crate) trait TryIntoValue: Sized { + fn try_into_value(self) -> Result; +} + +impl TryIntoValue for T +where U: TryFromValue +{ + #[inline] + fn try_into_value(self) -> Result { + U::try_from_value(self) + } +} + +impl TryFromValue<&StructType> for ArrowSchema { + fn try_from_value(s: &StructType) -> Result { + let fields: Vec = s.fields().map(TryIntoValue::try_into_value).try_collect()?; + Ok(ArrowSchema::new(fields)) + } +} + +impl TryFromValue<&StructField> for ArrowField { + fn try_from_value(f: &StructField) -> Result { + let metadata = f + .metadata() + .iter() + .map(|(key, val)| match &val { + &MetadataValue::String(val) => Ok((key.clone(), val.clone())), + _ => Ok((key.clone(), serde_json::to_string(val)?)), + }) + .collect::>()?; + + let field = ArrowField::new( + f.name(), + ArrowDataType::try_from_value(f.data_type())?, + f.is_nullable(), + ) + .with_metadata(metadata); + + Ok(field) + } +} + +impl TryFromValue<&ArrayType> for ArrowField { + fn try_from_value(a: &ArrayType) -> Result { + Ok(ArrowField::new( + LIST_ARRAY_ROOT, + ArrowDataType::try_from_value(a.element_type())?, + a.contains_null(), + )) + } +} + +impl TryFromValue<&MapType> for ArrowField { + fn try_from_value(a: &MapType) -> Result { + Ok(ArrowField::new( + MAP_ROOT_DEFAULT, + ArrowDataType::Struct( + vec![ + ArrowField::new( + MAP_KEY_DEFAULT, + ArrowDataType::try_from_value(a.key_type())?, + false, + ), + ArrowField::new( + MAP_VALUE_DEFAULT, + ArrowDataType::try_from_value(a.value_type())?, + a.value_contains_null(), + ), + ] + .into(), + ), + false, // always non-null + )) + } +} + +impl TryFromValue<&DataType> for ArrowDataType { + fn try_from_value(t: &DataType) -> Result { + match t { + DataType::Primitive(p) => { + match p { + PrimitiveType::String => Ok(ArrowDataType::Utf8), + PrimitiveType::Long => Ok(ArrowDataType::Int64), // undocumented type + PrimitiveType::Integer => Ok(ArrowDataType::Int32), + PrimitiveType::Short => Ok(ArrowDataType::Int16), + PrimitiveType::Byte => Ok(ArrowDataType::Int8), + PrimitiveType::Float => Ok(ArrowDataType::Float32), + PrimitiveType::Double => Ok(ArrowDataType::Float64), + PrimitiveType::Boolean => Ok(ArrowDataType::Boolean), + PrimitiveType::Binary => Ok(ArrowDataType::Binary), + PrimitiveType::Decimal(dtype) => Ok(ArrowDataType::Decimal128( + dtype.precision(), + dtype.scale() as i8, // 0..=38 + )), + PrimitiveType::Date => { + // A calendar date, represented as a year-month-day triple without a + // timezone. Stored as 4 bytes integer representing days since 1970-01-01 + Ok(ArrowDataType::Date32) + } + // TODO: https://github.com/delta-io/delta/issues/643 + PrimitiveType::Timestamp => Ok(ArrowDataType::Timestamp( + TimeUnit::Microsecond, + Some("UTC".into()), + )), + PrimitiveType::TimestampNtz => { + Ok(ArrowDataType::Timestamp(TimeUnit::Microsecond, None)) + } + } + } + DataType::Struct(s) => Ok(ArrowDataType::Struct( + s.fields() + .map(TryIntoValue::try_into_value) + .collect::>>()? + .into(), + )), + DataType::Array(a) => Ok(ArrowDataType::List(Arc::new(a.as_ref().try_into_value()?))), + DataType::Map(m) => Ok(ArrowDataType::Map( + Arc::new(m.as_ref().try_into_value()?), + false, + )), + } + } +} + +impl TryFromValue<&ArrowSchema> for StructType { + fn try_from_value(arrow_schema: &ArrowSchema) -> Result { + StructType::try_new( + arrow_schema + .fields() + .iter() + .map(|field| field.as_ref().try_into_value()), + ) + } +} + +impl TryFromValue for StructType { + fn try_from_value(arrow_schema: ArrowSchemaRef) -> Result { + arrow_schema.as_ref().try_into_value() + } +} + +impl TryFromValue<&ArrowField> for StructField { + fn try_from_value(arrow_field: &ArrowField) -> Result { + Ok(StructField::new( + arrow_field.name().clone(), + DataType::try_from_value(arrow_field.data_type())?, + arrow_field.is_nullable(), + ) + .with_metadata(arrow_field.metadata().iter().map(|(k, v)| (k.clone(), v)))) + } +} + +impl TryFromValue<&ArrowDataType> for DataType { + fn try_from_value(arrow_datatype: &ArrowDataType) -> Result { + match arrow_datatype { + ArrowDataType::Utf8 => Ok(DataType::STRING), + ArrowDataType::LargeUtf8 => Ok(DataType::STRING), + ArrowDataType::Utf8View => Ok(DataType::STRING), + ArrowDataType::Int64 => Ok(DataType::LONG), // undocumented type + ArrowDataType::Int32 => Ok(DataType::INTEGER), + ArrowDataType::Int16 => Ok(DataType::SHORT), + ArrowDataType::Int8 => Ok(DataType::BYTE), + ArrowDataType::UInt64 => Ok(DataType::LONG), // undocumented type + ArrowDataType::UInt32 => Ok(DataType::INTEGER), + ArrowDataType::UInt16 => Ok(DataType::SHORT), + ArrowDataType::UInt8 => Ok(DataType::BYTE), + ArrowDataType::Float32 => Ok(DataType::FLOAT), + ArrowDataType::Float64 => Ok(DataType::DOUBLE), + ArrowDataType::Boolean => Ok(DataType::BOOLEAN), + ArrowDataType::Binary => Ok(DataType::BINARY), + ArrowDataType::FixedSizeBinary(_) => Ok(DataType::BINARY), + ArrowDataType::LargeBinary => Ok(DataType::BINARY), + ArrowDataType::BinaryView => Ok(DataType::BINARY), + ArrowDataType::Decimal128(p, s) => { + if *s < 0 { + return Err(Arrow56ConversionError::from(Error::Generic( + "Negative scales are not supported in Delta".to_owned(), + ))); + }; + DataType::decimal(*p, *s as u8) + .map_err(|e| Arrow56ConversionError::InvalidDataType(e.to_string())) + } + ArrowDataType::Date32 => Ok(DataType::DATE), + ArrowDataType::Date64 => Ok(DataType::DATE), + ArrowDataType::Timestamp(TimeUnit::Microsecond, None) => Ok(DataType::TIMESTAMP_NTZ), + ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(tz)) + if tz.eq_ignore_ascii_case("utc") => + { + Ok(DataType::TIMESTAMP) + } + ArrowDataType::Struct(fields) => DataType::try_struct_type( + fields.iter().map(|field| field.as_ref().try_into_value()), + ), + ArrowDataType::List(field) => Ok(ArrayType::new( + (*field).data_type().try_into_value()?, + (*field).is_nullable(), + ) + .into()), + ArrowDataType::ListView(field) => Ok(ArrayType::new( + (*field).data_type().try_into_value()?, + (*field).is_nullable(), + ) + .into()), + ArrowDataType::LargeList(field) => Ok(ArrayType::new( + (*field).data_type().try_into_value()?, + (*field).is_nullable(), + ) + .into()), + ArrowDataType::LargeListView(field) => Ok(ArrayType::new( + (*field).data_type().try_into_value()?, + (*field).is_nullable(), + ) + .into()), + ArrowDataType::FixedSizeList(field, _) => Ok(ArrayType::new( + (*field).data_type().try_into_value()?, + (*field).is_nullable(), + ) + .into()), + ArrowDataType::Map(field, _) => { + if let ArrowDataType::Struct(struct_fields) = field.data_type() { + let key_type = DataType::try_from_value(struct_fields[0].data_type())?; + let value_type = DataType::try_from_value(struct_fields[1].data_type())?; + let value_type_nullable = struct_fields[1].is_nullable(); + Ok(MapType::new(key_type, value_type, value_type_nullable).into()) + } else { + panic!("DataType::Map should contain a struct field child"); + } + } + // Dictionary types are just an optimized in-memory representation of an array. + // Schema-wise, they are the same as the value type. + ArrowDataType::Dictionary(_, value_type) => Ok(value_type.as_ref().try_into_value()?), + s => Err(Arrow56ConversionError::InvalidDataType(format!( + "Invalid data type for Delta Lake: {s}" + ))), + } + } +} diff --git a/src/query/storages/delta/src/lib.rs b/src/query/storages/delta/src/lib.rs index dffe44433b0e2..87f63f01a8731 100644 --- a/src/query/storages/delta/src/lib.rs +++ b/src/query/storages/delta/src/lib.rs @@ -19,4 +19,6 @@ mod partition; mod table; mod table_source; +mod arrow56_conversion; + pub use table::DeltaTable; diff --git a/src/query/storages/delta/src/table.rs b/src/query/storages/delta/src/table.rs index 5d4b6553b8cb0..af8f0421422ae 100644 --- a/src/query/storages/delta/src/table.rs +++ b/src/query/storages/delta/src/table.rs @@ -58,6 +58,7 @@ use serde::Serialize; use tokio::sync::OnceCell; use url::Url; +use super::arrow56_conversion::TryIntoValue; use crate::partition::DeltaPartInfo; use crate::table_source::DeltaTableSource; @@ -140,7 +141,7 @@ impl DeltaTable { })?; // Build arrow schema from delta metadata. - let arrow_schema: ArrowSchema = delta_meta.try_into().map_err(|e| { + let arrow_schema: ArrowSchema = delta_meta.try_into_value().map_err(|e| { ErrorCode::ReadTableDataError(format!("Cannot convert table metadata: {e:?}")) })?; diff --git a/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs b/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs index 0d6abe475b4f1..2b9ad8719e2e2 100644 --- a/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs +++ b/src/query/storages/parquet/src/parquet_reader/reader/full_reader.rs @@ -37,11 +37,11 @@ use parquet::arrow::arrow_reader::ArrowReaderOptions; use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::arrow::arrow_reader::RowFilter; use parquet::arrow::async_reader::AsyncFileReader; -use parquet::arrow::async_reader::MetadataLoader; use parquet::arrow::async_reader::ParquetRecordBatchStream; use parquet::arrow::ParquetRecordBatchStreamBuilder; use parquet::arrow::ProjectionMask; use parquet::file::metadata::ParquetMetaData; +use parquet::file::metadata::ParquetMetaDataReader; use parquet::schema::types::SchemaDescPtr; use crate::meta::check_parquet_schema; @@ -297,12 +297,11 @@ impl AsyncFileReader for ParquetFileReader { _options: Option<&'_ ArrowReaderOptions>, ) -> BoxFuture<'_, parquet::errors::Result>> { Box::pin(async move { - let size = self.size as usize; - #[allow(deprecated)] - let mut loader = MetadataLoader::load(self, size, None).await?; - #[allow(deprecated)] - loader.load_page_index(false, false).await?; - Ok(Arc::new(loader.finish())) + let size = self.size; + let meta_data = ParquetMetaDataReader::new() + .load_and_finish(self, size) + .await?; + Ok(Arc::new(meta_data)) }) } } diff --git a/src/query/storages/parquet/src/statistics/column.rs b/src/query/storages/parquet/src/statistics/column.rs index 73c3dc6d65cdc..b39a56f6844fd 100644 --- a/src/query/storages/parquet/src/statistics/column.rs +++ b/src/query/storages/parquet/src/statistics/column.rs @@ -27,11 +27,30 @@ use super::utils::decode_decimal256_from_bytes; /// according to https://github.com/apache/parquet-format/blob/master/LogicalTypes.md pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option { - let (max, min) = if s.has_min_max_set() { + if s.is_min_max_deprecated() { + return None; + } + let has_min_max_set = { + match s { + Statistics::Boolean(s) => s.max_opt().is_some() && s.min_opt().is_some(), + Statistics::Int32(s) => s.max_opt().is_some() && s.min_opt().is_some(), + Statistics::Int64(s) => s.max_opt().is_some() && s.min_opt().is_some(), + Statistics::Int96(s) => s.max_opt().is_some() && s.min_opt().is_some(), + Statistics::Float(s) => s.max_opt().is_some() && s.min_opt().is_some(), + Statistics::Double(s) => s.max_opt().is_some() && s.min_opt().is_some(), + Statistics::ByteArray(s) => s.max_opt().is_some() && s.min_opt().is_some(), + Statistics::FixedLenByteArray(s) => s.max_opt().is_some() && s.min_opt().is_some(), + } + }; + + let (max, min) = if has_min_max_set { match s { - Statistics::Boolean(s) => (Scalar::Boolean(*s.max()), Scalar::Boolean(*s.min())), + Statistics::Boolean(s) => ( + Scalar::Boolean(*s.max_opt().unwrap()), + Scalar::Boolean(*s.min_opt().unwrap()), + ), Statistics::Int32(s) => { - let (max, min) = (*s.max(), *s.min()); + let (max, min) = (*s.max_opt().unwrap(), *s.min_opt().unwrap()); match typ { TableDataType::Number(NumberDataType::Int8) => { (Scalar::from(max as i8), Scalar::from(min as i8)) @@ -67,7 +86,7 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option< } } Statistics::Int64(s) => { - let (max, min) = (*s.max(), *s.min()); + let (max, min) = (*s.max_opt().unwrap(), *s.min_opt().unwrap()); match typ { TableDataType::Number(NumberDataType::UInt64) => { (Scalar::from(max as u64), Scalar::from(min as u64)) @@ -98,25 +117,26 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option< } } Statistics::Int96(s) => { - let (max, min) = (s.max().to_i64(), s.min().to_i64()); - let multi = match max.checked_ilog10().unwrap_or_default() + 1 { - 0..=10 => 1_000_000, - 11..=13 => 1_000, - _ => 1, - }; - ( - Scalar::Timestamp(max * multi), - Scalar::Timestamp(min * multi), - ) + let (max, min) = ( + s.max_opt().unwrap().to_micros(), + s.min_opt().unwrap().to_micros(), + ); + (Scalar::Timestamp(max), Scalar::Timestamp(min)) } - Statistics::Float(s) => (Scalar::from(*s.max()), Scalar::from(*s.min())), - Statistics::Double(s) => (Scalar::from(*s.max()), Scalar::from(*s.min())), + Statistics::Float(s) => ( + Scalar::from(*s.max_opt().unwrap()), + Scalar::from(*s.min_opt().unwrap()), + ), + Statistics::Double(s) => ( + Scalar::from(*s.max_opt().unwrap()), + Scalar::from(*s.min_opt().unwrap()), + ), Statistics::ByteArray(s) => ( - Scalar::String(String::from_utf8(s.max().as_bytes().to_vec()).ok()?), - Scalar::String(String::from_utf8(s.min().as_bytes().to_vec()).ok()?), + Scalar::String(String::from_utf8(s.max_opt().unwrap().as_bytes().to_vec()).ok()?), + Scalar::String(String::from_utf8(s.min_opt().unwrap().as_bytes().to_vec()).ok()?), ), Statistics::FixedLenByteArray(s) => { - let (max, min) = (s.max(), s.min()); + let (max, min) = (s.max_opt().unwrap(), s.min_opt().unwrap()); match typ { TableDataType::Decimal(DecimalDataType::Decimal128(size)) => ( decode_decimal128_from_bytes(max, *size), @@ -136,8 +156,9 @@ pub fn convert_column_statistics(s: &Statistics, typ: &TableDataType) -> Option< Some(ColumnStatistics::new( min, max, - s.null_count(), + // Doc this + s.null_count_opt().unwrap_or(0), 0, // this field is not used. - s.distinct_count(), + s.distinct_count_opt(), )) } diff --git a/src/query/storages/parquet/src/statistics/page.rs b/src/query/storages/parquet/src/statistics/page.rs index 601069c2dae34..69e2b6e8b620b 100644 --- a/src/query/storages/parquet/src/statistics/page.rs +++ b/src/query/storages/parquet/src/statistics/page.rs @@ -184,8 +184,8 @@ fn convert_page_index_int96( ) -> Option { match (&index.min, &index.max, index.null_count) { (Some(min), Some(max), Some(null_count)) => Some(ColumnStatistics::new( - Scalar::Timestamp(min.to_i64()), - Scalar::Timestamp(max.to_i64()), + Scalar::Timestamp(min.to_micros()), + Scalar::Timestamp(max.to_micros()), null_count as u64, 0, None, From 7b485ae4ed9ceebcf59d7e2fc84a58a56aabef39 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 20 Nov 2025 18:21:58 +0800 Subject: [PATCH 2/5] fix: tweak logic tests Meta data that marks the parquet writer version has been changed to 'parquet-rs version 56.2.0', thus some cases that expect exact parquet file size need to be tweaked. --- .../suites/ee/01_ee_system/01_0002_virtual_column.test | 2 +- .../index/08_ngram_index/08_0000_ngram_index_base.test | 2 +- .../formats/parquet/options/parquet_missing_uuid.test | 2 +- tests/sqllogictests/suites/stage/unload.test | 2 +- .../00_stage/00_0005_copy_into_location.result | 2 +- .../00_stage/00_0019_sequence_as_default.result | 4 ++-- .../01_0006_streaming_load_parquet.result | 10 +++++----- .../01_0007_streaming_load_placeholder.result | 2 +- 8 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test index 35d8441b0456b..e98bb103000a0 100644 --- a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test +++ b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test @@ -140,7 +140,7 @@ test_virtual_column t2 val 3000000002 ['c'] String query II select row_count, virtual_column_size from fuse_block('test_virtual_column', 't2') ---- -3 806 +3 833 query III select block_count, row_count, virtual_column_size from fuse_segment('test_virtual_column', 't2'); diff --git a/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test b/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test index c2fd6f5688d71..07c40a3c0f372 100644 --- a/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test +++ b/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test @@ -20,7 +20,7 @@ INSERT INTO t1 VALUES query TII select name, index_size, bloom_index_size, ngram_index_size from system.tables where name='t1' and database='test_gram_index'; ---- -t1 1049482 1049482 1048617 +t1 1049509 1049509 1048617 query III select row_count, bloom_filter_size, ngram_index_size from fuse_block('test_gram_index', 't1') diff --git a/tests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_uuid.test b/tests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_uuid.test index 836686dadd8f9..49f52982befba 100644 --- a/tests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_uuid.test +++ b/tests/sqllogictests/suites/stage/formats/parquet/options/parquet_missing_uuid.test @@ -25,7 +25,7 @@ select * from t_uuid query copy into @data/parquet/unload/uuid/ from (select 1 as a) file_format = (type = parquet) ---- -1 1 408 +1 1 414 statement ok truncate table t_uuid diff --git a/tests/sqllogictests/suites/stage/unload.test b/tests/sqllogictests/suites/stage/unload.test index 7a09dfe8e0407..b4c5918566091 100644 --- a/tests/sqllogictests/suites/stage/unload.test +++ b/tests/sqllogictests/suites/stage/unload.test @@ -145,7 +145,7 @@ copy into @unload/a_raw_path.csv from (select 3,4) file_format=(type=csv) single query copy into @unload/array_of_nulls from (select [NULL, NULL]); ---- -1 8 548 +1 8 555 query select * from @unload/array_of_nulls; diff --git a/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.result b/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.result index 1c6cfc031d181..75b2cf047e56e 100755 --- a/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.result +++ b/tests/suites/1_stateful/00_stage/00_0005_copy_into_location.result @@ -1,4 +1,4 @@ >>>> create or replace connection c_00_0005 storage_type='s3' access_key_id = 'minioadmin' endpoint_url = 'http://127.0.0.1:9900' secret_access_key = 'minioadmin' >>>> copy into 's3://testbucket/c_00_0005/ab de/f' connection=(connection_name='c_00_0005') from (select 1) detailed_output=true use_raw_path=true single=true overwrite=true -c_00_0005/ab de/f 408 1 +c_00_0005/ab de/f 414 1 <<<< diff --git a/tests/suites/1_stateful/00_stage/00_0019_sequence_as_default.result b/tests/suites/1_stateful/00_stage/00_0019_sequence_as_default.result index e915d9dfd3038..441dd73d53b28 100755 --- a/tests/suites/1_stateful/00_stage/00_0019_sequence_as_default.result +++ b/tests/suites/1_stateful/00_stage/00_0019_sequence_as_default.result @@ -42,9 +42,9 @@ >>>> create or replace sequence seq >>>> create or replace table dest(seq int default nextval(seq), a int) >>>> copy INTO @sequence_as_default/src1/ from src1 file_format=(type=parquet); -2 9 430 +2 9 434 >>>> copy INTO @sequence_as_default/src2/ from src2 file_format=(type=parquet); -2 18 660 +2 18 671 >>>> copy INTO dest(a) from @sequence_as_default/src1 file_format=(type=parquet) return_failed_only=true; >>>> copy INTO dest from @sequence_as_default/src2 file_format=(type=parquet) return_failed_only=true; >>>> select * from dest order by seq diff --git a/tests/suites/1_stateful/01_streaming_load/01_0006_streaming_load_parquet.result b/tests/suites/1_stateful/01_streaming_load/01_0006_streaming_load_parquet.result index af42ad6f64917..ec8524237e971 100755 --- a/tests/suites/1_stateful/01_streaming_load/01_0006_streaming_load_parquet.result +++ b/tests/suites/1_stateful/01_streaming_load/01_0006_streaming_load_parquet.result @@ -2,7 +2,7 @@ >>>> CREATE or replace TABLE streaming_load_parquet (c1 string default 'ok', c2 int, c3 date); --'2021-01-01' as c3, '1' as c2 >>>> copy into @streaming_load_parquet/q1.parquet from (select '2021-01-01' as c3, '1' as c2) single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; -q1.parquet 624 1 +q1.parquet 637 1 >>>> streaming load: q1.parquet error : + curl -sS -H x-databend-query-id:load-q1 -H 'X-Databend-SQL:insert into streaming_load_parquet(c2,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=error, null_if=())' -F upload=@/tmp/streaming_load_parquet/q1.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load {"id":"load-q1","stats":{"rows":1,"bytes":27}} @@ -13,7 +13,7 @@ ok 1 2021-01-01 >>>> truncate table streaming_load_parquet --'2021-01-01' as c3 >>>> copy into @streaming_load_parquet/q2.parquet from (select '2021-01-01' as c3) single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; -q2.parquet 426 1 +q2.parquet 431 1 >>>> streaming load: q2.parquet error : + curl -sS -H x-databend-query-id:load-q2 -H 'X-Databend-SQL:insert into streaming_load_parquet(c2,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=error, null_if=())' -F upload=@/tmp/streaming_load_parquet/q2.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load {"error":{"code":400,"message":"[HTTP-STREAMING-LOAD] Query execution failed: file q2.parquet missing column `c2`"}} @@ -23,7 +23,7 @@ q2.parquet 426 1 >>>> truncate table streaming_load_parquet --'2021-01-01' as c3 >>>> copy into @streaming_load_parquet/q3.parquet from (select '2021-01-01' as c3) single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; -q3.parquet 426 1 +q3.parquet 431 1 >>>> streaming load: q3.parquet field_default : + curl -sS -H x-databend-query-id:load-q3 -H 'X-Databend-SQL:insert into streaming_load_parquet(c2,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=field_default, null_if=())' -F upload=@/tmp/streaming_load_parquet/q3.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load {"id":"load-q3","stats":{"rows":1,"bytes":23}} @@ -34,7 +34,7 @@ ok NULL 2021-01-01 >>>> truncate table streaming_load_parquet --'2021-01-01' as c3, 'my_null' as c1 >>>> copy into @streaming_load_parquet/q4.parquet from (select '2021-01-01' as c3, 'my_null' as c1) single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; -q4.parquet 643 1 +q4.parquet 655 1 >>>> streaming load: q4.parquet error : + curl -sS -H x-databend-query-id:load-q4 -H 'X-Databend-SQL:insert into streaming_load_parquet(c1,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=error, null_if=())' -F upload=@/tmp/streaming_load_parquet/q4.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load {"id":"load-q4","stats":{"rows":1,"bytes":34}} @@ -45,7 +45,7 @@ my_null NULL 2021-01-01 >>>> truncate table streaming_load_parquet --'2021-01-01' as c3, 'my_null' as c1 >>>> copy into @streaming_load_parquet/q5.parquet from (select '2021-01-01' as c3, 'my_null' as c1) single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; -q5.parquet 643 1 +q5.parquet 655 1 >>>> streaming load: q5.parquet error 'my_null': + curl -sS -H x-databend-query-id:load-q5 -H 'X-Databend-SQL:insert into streaming_load_parquet(c1,c3) from @_databend_load file_format = (type='\''parquet'\'', missing_field_as=error, null_if=('\''my_null'\''))' -F upload=@/tmp/streaming_load_parquet/q5.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load {"id":"load-q5","stats":{"rows":1,"bytes":7}} diff --git a/tests/suites/1_stateful/01_streaming_load/01_0007_streaming_load_placeholder.result b/tests/suites/1_stateful/01_streaming_load/01_0007_streaming_load_placeholder.result index 79877f10ded19..f042a1dff4807 100755 --- a/tests/suites/1_stateful/01_streaming_load/01_0007_streaming_load_placeholder.result +++ b/tests/suites/1_stateful/01_streaming_load/01_0007_streaming_load_placeholder.result @@ -32,7 +32,7 @@ ok 110 a 2020-01-02 >>>> truncate table streaming_load_07 --parquet >>>> copy into @streaming_load_07/data.parquet from (select '2020-01-02' as c4, 110 as c2) file_format=(type='parquet') single=true include_query_id=false use_raw_path=true detailed_output=true overwrite=true; -data.parquet 665 1 +data.parquet 678 1 + curl -sS -H x-databend-query-id:load-parquet -H 'X-Databend-SQL:insert into streaming_load_07(c3, c4, c2) values ('\''a'\'', ?, ?) from @_databend_load file_format = (type=parquet)' -F upload=@/tmp/streaming_load_07/data.parquet -u root: -XPUT http://localhost:8000/v1/streaming_load {"id":"load-parquet","stats":{"rows":1,"bytes":44}} <<<< From 0b448aa7ee5ceb05ab3af124a9457e7d901abbcc Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 20 Nov 2025 19:20:55 +0800 Subject: [PATCH 3/5] fix: tweak logic tests --- .../suites/base/09_fuse_engine/09_0006_func_fuse_history.test | 2 +- .../suites/ee/01_ee_system/01_0002_virtual_column.test | 2 +- .../query/index/08_ngram_index/08_0000_ngram_index_base.test | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test index e184963bab0cf..53909e0378624 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test @@ -34,7 +34,7 @@ insert into t values (2),(3) query III select block_count, row_count, index_size from fuse_snapshot('db_09_0006', 't') order by row_count desc limit 1 ---- -2 3 425 +2 3 434 query II select block_count, row_count from fuse_snapshot('db_09_0006', 't') order by row_count; diff --git a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test index e98bb103000a0..51a9fa5c14cf0 100644 --- a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test +++ b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test @@ -145,7 +145,7 @@ select row_count, virtual_column_size from fuse_block('test_virtual_column', 't2 query III select block_count, row_count, virtual_column_size from fuse_segment('test_virtual_column', 't2'); ---- -1 3 806 +1 3 833 query III select block_count, row_count, virtual_column_size from fuse_snapshot('test_virtual_column', 't2'); diff --git a/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test b/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test index 07c40a3c0f372..9ab5672f68a72 100644 --- a/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test +++ b/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test @@ -25,7 +25,7 @@ t1 1049509 1049509 1048617 query III select row_count, bloom_filter_size, ngram_index_size from fuse_block('test_gram_index', 't1') ---- -4 1049482 1048617 +4 1049509 1048617 query IIIII select block_count, row_count, index_size, bloom_index_size, ngram_index_size from fuse_segment('test_gram_index', 't1'); From a4ae319da21fc0dd72a001597d53f051257d2db0 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 20 Nov 2025 20:07:00 +0800 Subject: [PATCH 4/5] tweak logic tests --- .../suites/base/09_fuse_engine/09_0006_func_fuse_history.test | 2 +- .../query/index/08_ngram_index/08_0000_ngram_index_base.test | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test b/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test index 53909e0378624..fcc3e5e4b8ff3 100644 --- a/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test +++ b/tests/sqllogictests/suites/base/09_fuse_engine/09_0006_func_fuse_history.test @@ -52,7 +52,7 @@ query I select bloom_filter_size from fuse_block('db_09_0006', 't') order by bloom_filter_size ---- 0 -425 +434 statement ok create table t1(a int not null) row_per_block=3 diff --git a/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test b/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test index 9ab5672f68a72..5412fc6788cf5 100644 --- a/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test +++ b/tests/sqllogictests/suites/query/index/08_ngram_index/08_0000_ngram_index_base.test @@ -30,12 +30,12 @@ select row_count, bloom_filter_size, ngram_index_size from fuse_block('test_gram query IIIII select block_count, row_count, index_size, bloom_index_size, ngram_index_size from fuse_segment('test_gram_index', 't1'); ---- -1 4 1049482 1049482 1048617 +1 4 1049509 1049509 1048617 query IIIII select block_count, row_count, index_size, bloom_index_size, ngram_index_size from fuse_snapshot('test_gram_index', 't1'); ---- -1 4 1049482 1049482 1048617 +1 4 1049509 1049509 1048617 statement ok CREATE TABLE t2 (id int, content string, name string) From 6e9b84878692d53f759cd2da6f722d0a1b359304 Mon Sep 17 00:00:00 2001 From: dantengsky Date: Thu, 20 Nov 2025 20:50:00 +0800 Subject: [PATCH 5/5] fix: tweak logic tests --- .../suites/ee/01_ee_system/01_0002_virtual_column.test | 10 +++++----- .../standalone/explain/index/explain_ngram_index.test | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test index 51a9fa5c14cf0..1090f7511b0aa 100644 --- a/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test +++ b/tests/sqllogictests/suites/ee/01_ee_system/01_0002_virtual_column.test @@ -150,19 +150,19 @@ select block_count, row_count, virtual_column_size from fuse_segment('test_virtu query III select block_count, row_count, virtual_column_size from fuse_snapshot('test_virtual_column', 't2'); ---- -1 3 806 +1 3 833 query IITTIII select virtual_block_size, row_count, column_name, column_type, column_id, block_offset, bytes_compressed from fuse_virtual_column('test_virtual_column', 't2') ---- -806 3 val['a'] UInt64 NULL 3000000000 4 48 -806 3 val['b'] UInt64 NULL 3000000001 52 48 -806 3 val['c'] String NULL 3000000002 100 48 +833 3 val['a'] UInt64 NULL 3000000000 4 48 +833 3 val['b'] UInt64 NULL 3000000001 52 48 +833 3 val['c'] String NULL 3000000002 100 48 query IIIIII select block_count, row_count, bytes_uncompressed, bytes_compressed, index_size, virtual_block_count from fuse_segment('test_virtual_column', 't2') ---- -1 3 134 694 1235 1 +1 3 134 712 1271 1 statement ok insert into t2 values(4, '{"a":44,"b":4,"c":"value"}'), (5, '{"a":55,"b":5,"c":"bend"}'), (6, '6') diff --git a/tests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.test b/tests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.test index 3bde648dc0d6a..0023f239bc303 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.test @@ -319,8 +319,8 @@ INSERT INTO t3 VALUES query II select block_size, bloom_filter_size, ngram_index_size from fuse_block('test_ngram_index_db', 't3'); ---- -209 867 NULL -240 867 NULL +209 894 NULL +240 894 NULL statement ok CREATE NGRAM INDEX idx1 ON t3(content1, content2) gram_size = 5 bloom_size = 1048576 @@ -331,8 +331,8 @@ REFRESH NGRAM INDEX idx1 ON t3 query III select block_size, bloom_filter_size, ngram_index_size from fuse_block('test_ngram_index_db', 't3'); ---- -209 2098525 2097234 -240 2098525 2097234 +209 2098570 2097234 +240 2098570 2097234 query ITT SELECT * FROM t3 WHERE content1 like '%speak%'