Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 36 additions & 80 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 27 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -89,30 +89,30 @@ version = "53.1.0"
#
# See for more details: https://github.com/rust-lang/cargo/issues/11329
apache-avro = { version = "0.21", default-features = false }
arrow = { version = "58.3.0", features = [
arrow = { version = "59.0.0", features = [
"prettyprint",
"chrono-tz",
] }
arrow-avro = { version = "58.3.0", default-features = false, features = [
arrow-avro = { version = "59.0.0", default-features = false, features = [
"deflate",
"snappy",
"zstd",
"bzip2",
"xz",
] }
arrow-buffer = { version = "58.3.0", default-features = false }
arrow-data = { version = "58.3.0", default-features = false }
arrow-flight = { version = "58.3.0", features = [
arrow-buffer = { version = "59.0.0", default-features = false }
arrow-data = { version = "59.0.0", default-features = false }
arrow-flight = { version = "59.0.0", features = [
"flight-sql-experimental",
] }
# Both codecs are required here to make sure that code paths like
# file-spilling have access to all compression codecs.
arrow-ipc = { version = "58.3.0", default-features = false, features = [
arrow-ipc = { version = "59.0.0", default-features = false, features = [
"lz4",
"zstd",
] }
arrow-ord = { version = "58.3.0", default-features = false }
arrow-schema = { version = "58.3.0", default-features = false }
arrow-ord = { version = "59.0.0", default-features = false }
arrow-schema = { version = "59.0.0", default-features = false }
async-trait = "0.1.89"
bigdecimal = "0.4.8"
bytes = "1.11"
Expand Down Expand Up @@ -178,7 +178,7 @@ memchr = "2.8.0"
num-traits = { version = "0.2" }
object_store = { version = "0.13.2", default-features = false }
parking_lot = "0.12"
parquet = { version = "58.3.0", default-features = false, features = [
parquet = { version = "59.0.0", default-features = false, features = [
"arrow",
"async",
"object_store",
Expand Down Expand Up @@ -291,3 +291,21 @@ debug = false
debug-assertions = false
strip = "debuginfo"
incremental = false


## Temporary arrow-rs patch until 59.0.0 is released

[patch.crates-io]
arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-avro = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" }
12 changes: 6 additions & 6 deletions datafusion-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,9 +613,9 @@ mod tests {
+-----------------------------------+-----------------+---------------------+------+------------------+
| filename | file_size_bytes | metadata_size_bytes | hits | extra |
+-----------------------------------+-----------------+---------------------+------+------------------+
| alltypes_plain.parquet | 1851 | 8882 | 2 | page_index=false |
| alltypes_tiny_pages.parquet | 454233 | 269074 | 2 | page_index=true |
| lz4_raw_compressed_larger.parquet | 380836 | 1339 | 2 | page_index=false |
| alltypes_plain.parquet | 1851 | 8794 | 2 | page_index=false |
Copy link
Copy Markdown
Contributor Author

@alamb alamb Jun 4, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this changed (smaller in memory size) due to the representation change of CompressionCodec in this pr

It changes from Compression which also carries the compression level: ZSTD(ZstdLevel), GZIP(GzipLevel), BROTLI(BrotliLevel) — and ZstdLevel(i32) / GzipLevel(u32) / BrotliLevel(u32) and are 4-byte wrappers. So Compression = 4-byte discriminant + 4-byte level = 8 bytes.

To a fieldless enum CompressionCodec -- 1 byte

| alltypes_tiny_pages.parquet | 454233 | 268970 | 2 | page_index=true |
| lz4_raw_compressed_larger.parquet | 380836 | 1331 | 2 | page_index=false |
+-----------------------------------+-----------------+---------------------+------+------------------+
");

Expand Down Expand Up @@ -644,9 +644,9 @@ mod tests {
+-----------------------------------+-----------------+---------------------+------+------------------+
| filename | file_size_bytes | metadata_size_bytes | hits | extra |
+-----------------------------------+-----------------+---------------------+------+------------------+
| alltypes_plain.parquet | 1851 | 8882 | 5 | page_index=false |
| alltypes_tiny_pages.parquet | 454233 | 269074 | 2 | page_index=true |
| lz4_raw_compressed_larger.parquet | 380836 | 1339 | 3 | page_index=false |
| alltypes_plain.parquet | 1851 | 8794 | 5 | page_index=false |
| alltypes_tiny_pages.parquet | 454233 | 268970 | 2 | page_index=true |
| lz4_raw_compressed_larger.parquet | 380836 | 1331 | 3 | page_index=false |
+-----------------------------------+-----------------+---------------------+------+------------------+
");

Expand Down
Loading
Loading