diff --git a/Cargo.lock b/Cargo.lock index cdfd97abcd166..795ad15c677b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -155,9 +155,8 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "378530e55cd479eda3c14eb345310799717e6f76d0c332041e8487022166b471" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-arith", "arrow-array", @@ -178,9 +177,8 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0ab212d2c1886e802f51c5212d78ebbcbb0bec980fff9dadc1eb8d45cd0b738" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-buffer", @@ -192,9 +190,8 @@ dependencies = [ [[package]] name = "arrow-array" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfd33d3e92f207444098c75b42de99d329562be0cf686b307b097cc52b4e999e" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "ahash", "arrow-buffer", @@ -211,9 +208,8 @@ dependencies = [ [[package]] name = "arrow-avro" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "049230728cd6e093088c8d231b4beede184e35cad7777c1505c0d5a8571f4376" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-buffer", @@ -235,9 +231,8 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6cd424c2693bcdbc150d843dc9d4d137dd2de4782ce6df491ad11a3a0416c0" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "bytes", "half", @@ -247,9 +242,8 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c5aefb56a2c02e9e2b30746241058b85f8983f0fcff2ba0c6d09006e1cded7f" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-buffer", @@ -269,9 +263,8 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94e8cf7e517657a52b91ea1263acf38c4ca62a84655d72458a3359b12ab97de" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-cast", @@ -284,9 +277,8 @@ dependencies = [ [[package]] name = "arrow-data" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c88210023a2bfee1896af366309a3028fc3bcbd6515fa29a7990ee1baa08ee0" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-buffer", "arrow-schema", @@ -297,9 +289,8 @@ dependencies = [ [[package]] name = "arrow-flight" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28abfe8bf9f124e5fc83b334af4fa58f8d0323ad25312ccb2d1da50178415704" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-arith", "arrow-array", @@ -325,9 +316,8 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "238438f0834483703d88896db6fe5a7138b2230debc31b34c0336c2996e3c64f" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-buffer", @@ -341,9 +331,8 @@ dependencies = [ [[package]] name = "arrow-json" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "205ca2119e6d679d5c133c6f30e68f027738d95ed948cf77677ea69c7800036b" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-buffer", @@ -366,9 +355,8 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bffd8fd2579286a5d63bac898159873e5094a79009940bcb42bbfce4f19f1d0" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-buffer", @@ -379,9 +367,8 @@ dependencies = [ [[package]] name = "arrow-row" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bab5994731204603c73ba69267616c50f80780774c6bb0476f1f830625115e0c" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-buffer", @@ -392,9 +379,8 @@ dependencies = [ [[package]] name = "arrow-schema" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f633dbfdf39c039ada1bf9e34c694816eb71fbb7dc78f613993b7245e078a1ed" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "bitflags", "serde", @@ -404,9 +390,8 @@ dependencies = [ [[package]] name = "arrow-select" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cd065c54172ac787cf3f2f8d4107e0d3fdc26edba76fdf4f4cc170258942222" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "ahash", "arrow-array", @@ -418,9 +403,8 @@ dependencies = [ [[package]] name = "arrow-string" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29dd7cda3ab9692f43a2e4acc444d760cc17b12bb6d8232ddf64e9bab7c06b42" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "arrow-array", "arrow-buffer", @@ -3717,12 +3701,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "integer-encoding" -version = "3.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" - [[package]] name = "ipnet" version = "2.12.0" @@ -4376,15 +4354,6 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" -[[package]] -name = "ordered-float" -version = "2.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" -dependencies = [ - "num-traits", -] - [[package]] name = "outref" version = "0.5.2" @@ -4432,9 +4401,8 @@ dependencies = [ [[package]] name = "parquet" -version = "58.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dafa7d01085b62a47dd0c1829550a0a36710ea9c4fe358a05a85477cec8a908" +version = "59.0.0" +source = "git+https://github.com/apache/arrow-rs.git?rev=dc765326433b1e2efcf0fb91168b95fb10314ebd#dc765326433b1e2efcf0fb91168b95fb10314ebd" dependencies = [ "ahash", "arrow-array", @@ -4461,7 +4429,6 @@ dependencies = [ "seq-macro", "simdutf8", "snap", - "thrift", "tokio", "twox-hash", "zstd", @@ -4821,7 +4788,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" dependencies = [ "heck", - "itertools 0.14.0", + "itertools 0.13.0", "log", "multimap", "petgraph", @@ -4840,7 +4807,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.13.0", "proc-macro2", "quote", "syn 2.0.117", @@ -6134,17 +6101,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float", -] - [[package]] name = "time" version = "0.3.47" diff --git a/Cargo.toml b/Cargo.toml index ff5d3afcf48f5..54cbf9d18185e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -89,30 +89,30 @@ version = "53.1.0" # # See for more details: https://github.com/rust-lang/cargo/issues/11329 apache-avro = { version = "0.21", default-features = false } -arrow = { version = "58.3.0", features = [ +arrow = { version = "59.0.0", features = [ "prettyprint", "chrono-tz", ] } -arrow-avro = { version = "58.3.0", default-features = false, features = [ +arrow-avro = { version = "59.0.0", default-features = false, features = [ "deflate", "snappy", "zstd", "bzip2", "xz", ] } -arrow-buffer = { version = "58.3.0", default-features = false } -arrow-data = { version = "58.3.0", default-features = false } -arrow-flight = { version = "58.3.0", features = [ +arrow-buffer = { version = "59.0.0", default-features = false } +arrow-data = { version = "59.0.0", default-features = false } +arrow-flight = { version = "59.0.0", features = [ "flight-sql-experimental", ] } # Both codecs are required here to make sure that code paths like # file-spilling have access to all compression codecs. -arrow-ipc = { version = "58.3.0", default-features = false, features = [ +arrow-ipc = { version = "59.0.0", default-features = false, features = [ "lz4", "zstd", ] } -arrow-ord = { version = "58.3.0", default-features = false } -arrow-schema = { version = "58.3.0", default-features = false } +arrow-ord = { version = "59.0.0", default-features = false } +arrow-schema = { version = "59.0.0", default-features = false } async-trait = "0.1.89" bigdecimal = "0.4.8" bytes = "1.11" @@ -178,7 +178,7 @@ memchr = "2.8.0" num-traits = { version = "0.2" } object_store = { version = "0.13.2", default-features = false } parking_lot = "0.12" -parquet = { version = "58.3.0", default-features = false, features = [ +parquet = { version = "59.0.0", default-features = false, features = [ "arrow", "async", "object_store", @@ -291,3 +291,21 @@ debug = false debug-assertions = false strip = "debuginfo" incremental = false + + +## Temporary arrow-rs patch until 59.0.0 is released + +[patch.crates-io] +arrow = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-array = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-avro = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-buffer = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-cast = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-data = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-ipc = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-schema = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-select = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-string = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-ord = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +arrow-flight = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } +parquet = { git = "https://github.com/apache/arrow-rs.git", rev = "dc765326433b1e2efcf0fb91168b95fb10314ebd" } \ No newline at end of file diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 935bf0a9744dd..0d8ada1367826 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -613,9 +613,9 @@ mod tests { +-----------------------------------+-----------------+---------------------+------+------------------+ | filename | file_size_bytes | metadata_size_bytes | hits | extra | +-----------------------------------+-----------------+---------------------+------+------------------+ - | alltypes_plain.parquet | 1851 | 8882 | 2 | page_index=false | - | alltypes_tiny_pages.parquet | 454233 | 269074 | 2 | page_index=true | - | lz4_raw_compressed_larger.parquet | 380836 | 1339 | 2 | page_index=false | + | alltypes_plain.parquet | 1851 | 8794 | 2 | page_index=false | + | alltypes_tiny_pages.parquet | 454233 | 268970 | 2 | page_index=true | + | lz4_raw_compressed_larger.parquet | 380836 | 1331 | 2 | page_index=false | +-----------------------------------+-----------------+---------------------+------+------------------+ "); @@ -644,9 +644,9 @@ mod tests { +-----------------------------------+-----------------+---------------------+------+------------------+ | filename | file_size_bytes | metadata_size_bytes | hits | extra | +-----------------------------------+-----------------+---------------------+------+------------------+ - | alltypes_plain.parquet | 1851 | 8882 | 5 | page_index=false | - | alltypes_tiny_pages.parquet | 454233 | 269074 | 2 | page_index=true | - | lz4_raw_compressed_larger.parquet | 380836 | 1339 | 3 | page_index=false | + | alltypes_plain.parquet | 1851 | 8794 | 5 | page_index=false | + | alltypes_tiny_pages.parquet | 454233 | 268970 | 2 | page_index=true | + | lz4_raw_compressed_larger.parquet | 380836 | 1331 | 3 | page_index=false | +-----------------------------------+-----------------+---------------------+------+------------------+ "); diff --git a/datafusion/common/src/file_options/mod.rs b/datafusion/common/src/file_options/mod.rs index 5d2abd23172ed..97b4a44f03223 100644 --- a/datafusion/common/src/file_options/mod.rs +++ b/datafusion/common/src/file_options/mod.rs @@ -114,14 +114,14 @@ mod tests { properties .bloom_filter_properties(&ColumnPath::from("")) .expect("expected bloom properties!") - .fpp, + .fpp(), 0.123 ); assert_eq!( properties .bloom_filter_properties(&ColumnPath::from("")) .expect("expected bloom properties!") - .ndv, + .ndv(), 123 ); @@ -242,7 +242,7 @@ mod tests { properties .bloom_filter_properties(&col1) .expect("expected bloom properties!") - .fpp, + .fpp(), 0.123 ); @@ -250,7 +250,7 @@ mod tests { properties .bloom_filter_properties(&col2_nested) .expect("expected bloom properties!") - .fpp, + .fpp(), 0.456 ); @@ -258,7 +258,7 @@ mod tests { properties .bloom_filter_properties(&col1) .expect("expected bloom properties!") - .ndv, + .ndv(), 123 ); @@ -266,7 +266,7 @@ mod tests { properties .bloom_filter_properties(&col2_nested) .expect("expected bloom properties!") - .ndv, + .ndv(), 456 ); diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs index d0a3cecdb857a..633948e032bff 100644 --- a/datafusion/common/src/file_options/parquet_writer.rs +++ b/datafusion/common/src/file_options/parquet_writer.rs @@ -157,8 +157,8 @@ impl TryFrom<&TableParquetOptions> for WriterPropertiesBuilder { } if let Some(bloom_filter_ndv) = options.bloom_filter_ndv { - builder = - builder.set_column_bloom_filter_ndv(path.clone(), bloom_filter_ndv); + builder = builder + .set_column_bloom_filter_max_ndv(path.clone(), bloom_filter_ndv); } } @@ -271,7 +271,7 @@ impl ParquetOptions { builder = builder.set_bloom_filter_fpp(*bloom_filter_fpp); }; if let Some(bloom_filter_ndv) = bloom_filter_ndv { - builder = builder.set_bloom_filter_ndv(*bloom_filter_ndv); + builder = builder.set_bloom_filter_max_ndv(*bloom_filter_ndv); }; if let Some(dictionary_enabled) = dictionary_enabled { builder = builder.set_dictionary_enabled(*dictionary_enabled); @@ -530,8 +530,8 @@ mod tests { } .into(), ), - bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp), - bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv), + bloom_filter_fpp: bloom_filter_default_props.map(|p| p.fpp()), + bloom_filter_ndv: bloom_filter_default_props.map(|p| p.ndv()), } } @@ -823,10 +823,12 @@ mod tests { ); assert_eq!( default_writer_props.bloom_filter_properties(&"default".into()), - Some(&BloomFilterProperties { - fpp: 0.42, - ndv: DEFAULT_BLOOM_FILTER_NDV - }), + Some( + &BloomFilterProperties::builder() + .with_fpp(0.42) + .with_max_ndv(DEFAULT_BLOOM_FILTER_NDV) + .build() + ), "should have only the fpp set, and the ndv at default", ); } @@ -910,7 +912,7 @@ mod tests { // the WriterProperties::default, with only ndv set let default_writer_props = WriterProperties::builder() .set_bloom_filter_enabled(true) - .set_bloom_filter_ndv(42) + .set_bloom_filter_max_ndv(42) .build(); assert_eq!( @@ -920,10 +922,12 @@ mod tests { ); assert_eq!( default_writer_props.bloom_filter_properties(&"default".into()), - Some(&BloomFilterProperties { - fpp: DEFAULT_BLOOM_FILTER_FPP, - ndv: 42 - }), + Some( + &BloomFilterProperties::builder() + .with_fpp(DEFAULT_BLOOM_FILTER_FPP) + .with_max_ndv(42) + .build() + ), "should have only the ndv set, and the fpp at default", ); } diff --git a/datafusion/core/tests/datasource/object_store_access.rs b/datafusion/core/tests/datasource/object_store_access.rs index 83b84f6f9284e..25150ae284cc0 100644 --- a/datafusion/core/tests/datasource/object_store_access.rs +++ b/datafusion/core/tests/datasource/object_store_access.rs @@ -904,7 +904,7 @@ async fn query_single_parquet_file_with_single_predicate() { RequestCountingObjectStore() Total Requests: 2 - GET (opts) path=parquet_table.parquet head=true - - GET (ranges) path=parquet_table.parquet ranges=1064-1481,1481-1594,1594-2011,2011-2124 + - GET (ranges) path=parquet_table.parquet ranges=1064-1594,1594-2124 " ); } @@ -928,8 +928,8 @@ async fn query_single_parquet_file_multi_row_groups_multiple_predicates() { RequestCountingObjectStore() Total Requests: 3 - GET (opts) path=parquet_table.parquet head=true - - GET (ranges) path=parquet_table.parquet ranges=4-421,421-534,534-951,951-1064 - - GET (ranges) path=parquet_table.parquet ranges=1064-1481,1481-1594,1594-2011,2011-2124 + - GET (ranges) path=parquet_table.parquet ranges=4-534,534-1064 + - GET (ranges) path=parquet_table.parquet ranges=1064-1594,1594-2124 " ); } diff --git a/datafusion/core/tests/extension_types/pretty_printing.rs b/datafusion/core/tests/extension_types/pretty_printing.rs index c0796887b8b6e..f097b5bec97fc 100644 --- a/datafusion/core/tests/extension_types/pretty_printing.rs +++ b/datafusion/core/tests/extension_types/pretty_printing.rs @@ -40,10 +40,16 @@ async fn create_test_table() -> Result { // define data. let batch = RecordBatch::try_new( schema, - vec![Arc::new(FixedSizeBinaryArray::from(vec![ - &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], - &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 5, 6], - ]))], + vec![Arc::new( + FixedSizeBinaryArray::try_from_iter( + vec![ + &[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 5, 6], + ] + .into_iter(), + ) + .unwrap(), + )], )?; let state = SessionStateBuilder::default() diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs index 0e936a79ebe9f..12296f8498d9f 100644 --- a/datafusion/core/tests/parquet/mod.rs +++ b/datafusion/core/tests/parquet/mod.rs @@ -726,11 +726,11 @@ fn make_bytearray_batch( let name: StringArray = std::iter::repeat_n(Some(name), num_rows).collect(); let service_string: StringArray = string_values.iter().map(Some).collect(); let service_binary: BinaryArray = binary_values.iter().map(Some).collect(); - let service_fixedsize: FixedSizeBinaryArray = fixedsize_values - .iter() - .map(|value| Some(value.as_slice())) - .collect::>() - .into(); + let service_fixedsize = FixedSizeBinaryArray::try_from_sparse_iter_with_size( + fixedsize_values.iter().map(|value| Some(value.as_slice())), + 3, + ) + .unwrap(); let service_large_binary: LargeBinaryArray = large_binary_values.iter().map(Some).collect(); diff --git a/datafusion/datasource-parquet/src/row_group_filter.rs b/datafusion/datasource-parquet/src/row_group_filter.rs index 07f4fe92cf308..1e9b0636e59e9 100644 --- a/datafusion/datasource-parquet/src/row_group_filter.rs +++ b/datafusion/datasource-parquet/src/row_group_filter.rs @@ -955,10 +955,7 @@ mod tests { let schema = Arc::new(Schema::new(vec![Field::new("c1", Decimal128(9, 2), false)])); let field = PrimitiveTypeField::new("c1", PhysicalType::INT32) - .with_logical_type(LogicalType::Decimal { - scale: 2, - precision: 9, - }) + .with_logical_type(LogicalType::decimal(2, 9)) .with_scale(2) .with_precision(9); let schema_descr = get_test_schema_descr(vec![field]); @@ -1023,10 +1020,7 @@ mod tests { Arc::new(Schema::new(vec![Field::new("c1", Decimal128(9, 0), false)])); let field = PrimitiveTypeField::new("c1", PhysicalType::INT32) - .with_logical_type(LogicalType::Decimal { - scale: 0, - precision: 9, - }) + .with_logical_type(LogicalType::decimal(0, 9)) .with_scale(0) .with_precision(9); let schema_descr = get_test_schema_descr(vec![field]); @@ -1118,10 +1112,7 @@ mod tests { false, )])); let field = PrimitiveTypeField::new("c1", PhysicalType::INT64) - .with_logical_type(LogicalType::Decimal { - scale: 2, - precision: 18, - }) + .with_logical_type(LogicalType::decimal(2, 18)) .with_scale(2) .with_precision(18); let schema_descr = get_test_schema_descr(vec![field]); @@ -1176,10 +1167,7 @@ mod tests { false, )])); let field = PrimitiveTypeField::new("c1", PhysicalType::FIXED_LEN_BYTE_ARRAY) - .with_logical_type(LogicalType::Decimal { - scale: 2, - precision: 18, - }) + .with_logical_type(LogicalType::decimal(2, 18)) .with_scale(2) .with_precision(18) .with_byte_len(16); @@ -1254,10 +1242,7 @@ mod tests { false, )])); let field = PrimitiveTypeField::new("c1", PhysicalType::BYTE_ARRAY) - .with_logical_type(LogicalType::Decimal { - scale: 2, - precision: 18, - }) + .with_logical_type(LogicalType::decimal(2, 18)) .with_scale(2) .with_precision(18) .with_byte_len(16); diff --git a/datafusion/physical-expr-common/src/utils.rs b/datafusion/physical-expr-common/src/utils.rs index e469885f83316..117da23df2f3e 100644 --- a/datafusion/physical-expr-common/src/utils.rs +++ b/datafusion/physical-expr-common/src/utils.rs @@ -614,11 +614,9 @@ mod tests { #[test] fn scatter_fixed_size_binary_test() -> Result<()> { - let truthy = Arc::new(FixedSizeBinaryArray::from(vec![ - &[1u8, 2][..], - &[3, 4][..], - &[5, 6][..], - ])); + let truthy = Arc::new(FixedSizeBinaryArray::try_from_iter( + vec![&[1u8, 2][..], &[3, 4][..], &[5, 6][..]].into_iter(), + )?); let mask = BooleanArray::from(vec![true, false, true, false, true]); let result = scatter(&mask, truthy.as_ref())?; diff --git a/datafusion/physical-expr/benches/in_list_strategy.rs b/datafusion/physical-expr/benches/in_list_strategy.rs index 5c4922fdcf8a9..3eff1f5cf3dff 100644 --- a/datafusion/physical-expr/benches/in_list_strategy.rs +++ b/datafusion/physical-expr/benches/in_list_strategy.rs @@ -993,7 +993,7 @@ fn bench_fixed_size_binary_inner( .collect(); let refs: Vec<&[u8]> = values.iter().map(|v| v.as_slice()).collect(); - let array = FixedSizeBinaryArray::from(refs); + let array = FixedSizeBinaryArray::try_from_iter(refs.into_iter()).unwrap(); let schema = Schema::new(vec![Field::new("a", array.data_type().clone(), true)]); let exprs: Vec<_> = haystack diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs index c4377b3189ff7..b1fdf3ddabb5a 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/tests.rs @@ -175,7 +175,7 @@ fn build_fixed_size_binary_table( let batch = RecordBatch::try_new( Arc::new(schema), vec![ - Arc::new(FixedSizeBinaryArray::from(a.1.clone())), + Arc::new(FixedSizeBinaryArray::try_from_iter(a.1.iter().copied()).unwrap()), Arc::new(Int32Array::from(b.1.clone())), Arc::new(Int32Array::from(c.1.clone())), ], diff --git a/datafusion/spark/src/function/hash/xxhash64.rs b/datafusion/spark/src/function/hash/xxhash64.rs index 5dca47bcb8984..9d02a51b2217e 100644 --- a/datafusion/spark/src/function/hash/xxhash64.rs +++ b/datafusion/spark/src/function/hash/xxhash64.rs @@ -363,12 +363,17 @@ mod tests { #[test] fn test_xxhash64_fixed_size_binary() { - let array = FixedSizeBinaryArray::from(vec![ - Some(&[0x01, 0x02, 0x03, 0x04][..]), - Some(&[0x05, 0x06, 0x07, 0x08][..]), - None, - Some(&[0x00, 0x00, 0x00, 0x00][..]), - ]); + let array = FixedSizeBinaryArray::try_from_sparse_iter_with_size( + vec![ + Some(&[0x01, 0x02, 0x03, 0x04][..]), + Some(&[0x05, 0x06, 0x07, 0x08][..]), + None, + Some(&[0x00, 0x00, 0x00, 0x00][..]), + ] + .into_iter(), + 4, + ) + .unwrap(); let array_ref: ArrayRef = Arc::new(array); let mut hashes = vec![DEFAULT_SEED; 4];