diff --git a/.asf.yaml b/.asf.yaml index 43c9250c2b826..805bb52456f40 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -65,6 +65,9 @@ github: branch-52: required_pull_request_reviews: required_approving_review_count: 1 + branch-53: + required_pull_request_reviews: + required_approving_review_count: 1 pull_requests: # enable updating head branches of pull requests allow_update_branch: true diff --git a/Cargo.lock b/Cargo.lock index 23670a7877041..d32b2e25019a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1715,7 +1715,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-schema", @@ -1789,7 +1789,7 @@ dependencies = [ [[package]] name = "datafusion-benchmarks" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "clap", @@ -1814,7 +1814,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -1837,7 +1837,7 @@ dependencies = [ [[package]] name = "datafusion-catalog-listing" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -1859,7 +1859,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -1890,7 +1890,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "apache-avro", @@ -1918,7 +1918,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" -version = "52.1.0" +version = "53.0.0" dependencies = [ "futures", "log", @@ -1927,7 +1927,7 @@ dependencies = [ [[package]] name = "datafusion-datasource" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-compression", @@ -1962,7 +1962,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-arrow" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-ipc", @@ -1985,7 +1985,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-avro" -version = "52.1.0" +version = "53.0.0" dependencies = [ "apache-avro", "arrow", @@ -2004,7 +2004,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-csv" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2025,7 +2025,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-json" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2047,7 +2047,7 @@ dependencies = [ [[package]] name = "datafusion-datasource-parquet" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2079,11 +2079,11 @@ dependencies = [ [[package]] name = "datafusion-doc" -version = "52.1.0" +version = "53.0.0" [[package]] name = "datafusion-examples" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-flight", @@ -2124,7 +2124,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2147,7 +2147,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2171,7 +2171,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2182,7 +2182,7 @@ dependencies = [ [[package]] name = "datafusion-ffi" -version = "52.1.0" +version = "53.0.0" dependencies = [ "abi_stable", "arrow", @@ -2216,7 +2216,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-buffer", @@ -2250,7 +2250,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2272,7 +2272,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2285,7 +2285,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "arrow-ord", @@ -2310,7 +2310,7 @@ dependencies = [ [[package]] name = "datafusion-functions-table" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2324,7 +2324,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "criterion", @@ -2341,7 +2341,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "datafusion-common", "datafusion-physical-expr-common", @@ -2349,7 +2349,7 @@ dependencies = [ [[package]] name = "datafusion-macros" -version = "52.1.0" +version = "53.0.0" dependencies = [ "datafusion-doc", "quote", @@ -2358,7 +2358,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2385,7 +2385,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2412,7 +2412,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-adapter" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2425,7 +2425,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2440,7 +2440,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2460,7 +2460,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "52.1.0" +version = "53.0.0" dependencies = [ "ahash", "arrow", @@ -2497,7 +2497,7 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2535,7 +2535,7 @@ dependencies = [ [[package]] name = "datafusion-proto-common" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2547,7 +2547,7 @@ dependencies = [ [[package]] name = "datafusion-pruning" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "datafusion-common", @@ -2565,7 +2565,7 @@ dependencies = [ [[package]] name = "datafusion-session" -version = "52.1.0" +version = "53.0.0" dependencies = [ "async-trait", "datafusion-common", @@ -2577,7 +2577,7 @@ dependencies = [ [[package]] name = "datafusion-spark" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2603,7 +2603,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "bigdecimal", @@ -2629,7 +2629,7 @@ dependencies = [ [[package]] name = "datafusion-sqllogictest" -version = "52.1.0" +version = "53.0.0" dependencies = [ "arrow", "async-trait", @@ -2660,7 +2660,7 @@ dependencies = [ [[package]] name = "datafusion-substrait" -version = "52.1.0" +version = "53.0.0" dependencies = [ "async-recursion", "async-trait", @@ -2681,7 +2681,7 @@ dependencies = [ [[package]] name = "datafusion-wasmtest" -version = "52.1.0" +version = "53.0.0" dependencies = [ "bytes", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 3bcf17d8ed657..c09225ba6d839 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,7 @@ repository = "https://github.com/apache/datafusion" # Define Minimum Supported Rust Version (MSRV) rust-version = "1.88.0" # Define DataFusion version -version = "52.1.0" +version = "53.0.0" [workspace.dependencies] # We turn off default-features for some dependencies here so the workspaces which inherit them can @@ -112,43 +112,43 @@ chrono = { version = "0.4.44", default-features = false } criterion = "0.8" ctor = "0.6.3" dashmap = "6.0.1" -datafusion = { path = "datafusion/core", version = "52.1.0", default-features = false } -datafusion-catalog = { path = "datafusion/catalog", version = "52.1.0" } -datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "52.1.0" } -datafusion-common = { path = "datafusion/common", version = "52.1.0", default-features = false } -datafusion-common-runtime = { path = "datafusion/common-runtime", version = "52.1.0" } -datafusion-datasource = { path = "datafusion/datasource", version = "52.1.0", default-features = false } -datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "52.1.0", default-features = false } -datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "52.1.0", default-features = false } -datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "52.1.0", default-features = false } -datafusion-datasource-json = { path = "datafusion/datasource-json", version = "52.1.0", default-features = false } -datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "52.1.0", default-features = false } -datafusion-doc = { path = "datafusion/doc", version = "52.1.0" } -datafusion-execution = { path = "datafusion/execution", version = "52.1.0", default-features = false } -datafusion-expr = { path = "datafusion/expr", version = "52.1.0", default-features = false } -datafusion-expr-common = { path = "datafusion/expr-common", version = "52.1.0" } -datafusion-ffi = { path = "datafusion/ffi", version = "52.1.0" } -datafusion-functions = { path = "datafusion/functions", version = "52.1.0" } -datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "52.1.0" } -datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "52.1.0" } -datafusion-functions-nested = { path = "datafusion/functions-nested", version = "52.1.0", default-features = false } -datafusion-functions-table = { path = "datafusion/functions-table", version = "52.1.0" } -datafusion-functions-window = { path = "datafusion/functions-window", version = "52.1.0" } -datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "52.1.0" } -datafusion-macros = { path = "datafusion/macros", version = "52.1.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "52.1.0", default-features = false } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "52.1.0", default-features = false } -datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "52.1.0", default-features = false } -datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "52.1.0", default-features = false } -datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "52.1.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "52.1.0" } -datafusion-proto = { path = "datafusion/proto", version = "52.1.0" } -datafusion-proto-common = { path = "datafusion/proto-common", version = "52.1.0" } -datafusion-pruning = { path = "datafusion/pruning", version = "52.1.0" } -datafusion-session = { path = "datafusion/session", version = "52.1.0" } -datafusion-spark = { path = "datafusion/spark", version = "52.1.0" } -datafusion-sql = { path = "datafusion/sql", version = "52.1.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "52.1.0" } +datafusion = { path = "datafusion/core", version = "53.0.0", default-features = false } +datafusion-catalog = { path = "datafusion/catalog", version = "53.0.0" } +datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "53.0.0" } +datafusion-common = { path = "datafusion/common", version = "53.0.0", default-features = false } +datafusion-common-runtime = { path = "datafusion/common-runtime", version = "53.0.0" } +datafusion-datasource = { path = "datafusion/datasource", version = "53.0.0", default-features = false } +datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "53.0.0", default-features = false } +datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "53.0.0", default-features = false } +datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "53.0.0", default-features = false } +datafusion-datasource-json = { path = "datafusion/datasource-json", version = "53.0.0", default-features = false } +datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "53.0.0", default-features = false } +datafusion-doc = { path = "datafusion/doc", version = "53.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "53.0.0", default-features = false } +datafusion-expr = { path = "datafusion/expr", version = "53.0.0", default-features = false } +datafusion-expr-common = { path = "datafusion/expr-common", version = "53.0.0" } +datafusion-ffi = { path = "datafusion/ffi", version = "53.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "53.0.0" } +datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "53.0.0" } +datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "53.0.0" } +datafusion-functions-nested = { path = "datafusion/functions-nested", version = "53.0.0", default-features = false } +datafusion-functions-table = { path = "datafusion/functions-table", version = "53.0.0" } +datafusion-functions-window = { path = "datafusion/functions-window", version = "53.0.0" } +datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "53.0.0" } +datafusion-macros = { path = "datafusion/macros", version = "53.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "53.0.0", default-features = false } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "53.0.0", default-features = false } +datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "53.0.0", default-features = false } +datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "53.0.0", default-features = false } +datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "53.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "53.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "53.0.0" } +datafusion-proto-common = { path = "datafusion/proto-common", version = "53.0.0" } +datafusion-pruning = { path = "datafusion/pruning", version = "53.0.0" } +datafusion-session = { path = "datafusion/session", version = "53.0.0" } +datafusion-spark = { path = "datafusion/spark", version = "53.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "53.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "53.0.0" } doc-comment = "0.3" env_logger = "0.11" diff --git a/dev/changelog/53.0.0.md b/dev/changelog/53.0.0.md new file mode 100644 index 0000000000000..91306c7f49a6d --- /dev/null +++ b/dev/changelog/53.0.0.md @@ -0,0 +1,602 @@ + + +# Apache DataFusion 53.0.0 Changelog + +This release consists of 447 commits from 105 contributors. See credits at the end of this changelog for more information. + +See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions. + +**Breaking changes:** + +- Allow logical optimizer to be run without evaluating now() & refactor SimplifyInfo [#19505](https://github.com/apache/datafusion/pull/19505) (adriangb) +- Make default ListingFilesCache table scoped [#19616](https://github.com/apache/datafusion/pull/19616) (jizezhang) +- chore(deps): Update sqlparser to 0.60 [#19672](https://github.com/apache/datafusion/pull/19672) (Standing-Man) +- Do not require mut in memory reservation methods [#19759](https://github.com/apache/datafusion/pull/19759) (gabotechs) +- refactor: make PhysicalExprAdatperFactory::create fallible [#20017](https://github.com/apache/datafusion/pull/20017) (niebayes) +- Add `ScalarValue::RunEndEncoded` variant [#19895](https://github.com/apache/datafusion/pull/19895) (Jefffrey) +- minor: remove unused crypto functions & narrow public API [#20045](https://github.com/apache/datafusion/pull/20045) (Jefffrey) +- Wrap immutable plan parts into Arc (make creating `ExecutionPlan`s less costly) [#19893](https://github.com/apache/datafusion/pull/19893) (askalt) +- feat: Support planning subqueries with OuterReferenceColumn belongs to non-adjacent outer relations [#19930](https://github.com/apache/datafusion/pull/19930) (mkleen) +- Remove the statistics() api in execution plan [#20319](https://github.com/apache/datafusion/pull/20319) (xudong963) +- Remove recursive const check in `simplify_const_expr` [#20234](https://github.com/apache/datafusion/pull/20234) (AdamGS) +- Cache `PlanProperties`, add fast-path for `with_new_children` [#19792](https://github.com/apache/datafusion/pull/19792) (askalt) + +**Performance related:** + +- perf: optimize `HashTableLookupExpr::evaluate` [#19602](https://github.com/apache/datafusion/pull/19602) (UBarney) +- perf: Improve performance of `split_part` [#19570](https://github.com/apache/datafusion/pull/19570) (andygrove) +- Optimize `Nullstate` / accumulators [#19625](https://github.com/apache/datafusion/pull/19625) (Dandandan) +- perf: optimize `NthValue` when `ignore_nulls` is true [#19496](https://github.com/apache/datafusion/pull/19496) (mzabaluev) +- Optimize `concat/concat_ws` scalar path by pre-allocating memory [#19547](https://github.com/apache/datafusion/pull/19547) (lyne7-sc) +- perf: optimize left function by eliminating double chars() iteration [#19571](https://github.com/apache/datafusion/pull/19571) (viirya) +- perf: Optimize floor and ceil scalar performance [#19752](https://github.com/apache/datafusion/pull/19752) (kumarUjjawal) +- perf: improve performance of `spark hex` function [#19738](https://github.com/apache/datafusion/pull/19738) (lyne7-sc) +- perf: Optimize initcap scalar performance [#19776](https://github.com/apache/datafusion/pull/19776) (kumarUjjawal) +- Row group limit pruning for row groups that entirely match predicates [#18868](https://github.com/apache/datafusion/pull/18868) (xudong963) +- perf: Optimize trunc scalar performance [#19788](https://github.com/apache/datafusion/pull/19788) (kumarUjjawal) +- perf: optimize `spark_hex` dictionary path by avoiding dictionary expansion [#19832](https://github.com/apache/datafusion/pull/19832) (lyne7-sc) +- Add FilterExecBuilder to avoid recomputing properties multiple times [#19854](https://github.com/apache/datafusion/pull/19854) (adriangb) +- perf: Optimize round scalar performance [#19831](https://github.com/apache/datafusion/pull/19831) (kumarUjjawal) +- perf: Optimize signum scalar performance with fast path [#19871](https://github.com/apache/datafusion/pull/19871) (kumarUjjawal) +- perf: Optimize scalar performance for cot [#19888](https://github.com/apache/datafusion/pull/19888) (kumarUjjawal) +- perf: Optimize scalar fast path for iszero [#19919](https://github.com/apache/datafusion/pull/19919) (kumarUjjawal) +- Misc hash / hash aggregation performance improvements [#19910](https://github.com/apache/datafusion/pull/19910) (Dandandan) +- perf: Optimize scalar path for ascii function [#19951](https://github.com/apache/datafusion/pull/19951) (kumarUjjawal) +- perf: Optimize factorial scalar path [#19949](https://github.com/apache/datafusion/pull/19949) (kumarUjjawal) +- Speedup statistics_from_parquet_metadata [#20004](https://github.com/apache/datafusion/pull/20004) (Dandandan) +- perf: improve performance of `array_remove`, `array_remove_n` and `array_remove_all` functions [#19996](https://github.com/apache/datafusion/pull/19996) (lyne7-sc) +- perf: Optimize ArrowBytesViewMap with direct view access [#19975](https://github.com/apache/datafusion/pull/19975) (Tushar7012) +- perf: Optimize repeat function for scalar and array fast [#19976](https://github.com/apache/datafusion/pull/19976) (kumarUjjawal) +- perf: Push down join key filters for LEFT/RIGHT/ANTI joins [#19918](https://github.com/apache/datafusion/pull/19918) (nuno-faria) +- perf: Optimize scalar path for chr function [#20073](https://github.com/apache/datafusion/pull/20073) (kumarUjjawal) +- perf: improve performance of `array_repeat` function [#20049](https://github.com/apache/datafusion/pull/20049) (lyne7-sc) +- perf: optimise right for byte access and StringView [#20069](https://github.com/apache/datafusion/pull/20069) (theirix) +- Optimize `PhysicalExprSimplifier` [#20111](https://github.com/apache/datafusion/pull/20111) (AdamGS) +- Improve performance of `CASE WHEN x THEN y ELSE NULL` expressions [#20097](https://github.com/apache/datafusion/pull/20097) (pepijnve) +- perf: Optimize scalar fast path of to_hex function [#20112](https://github.com/apache/datafusion/pull/20112) (kumarUjjawal) +- perf: Optimize scalar fast path & write() encoding for sha2 [#20116](https://github.com/apache/datafusion/pull/20116) (kumarUjjawal) +- perf: improve performance of `array_union`/`array_intersect` with batched row conversion [#20243](https://github.com/apache/datafusion/pull/20243) (lyne7-sc) +- perf: various optimizations to eliminate branch misprediction in hash_utils [#20168](https://github.com/apache/datafusion/pull/20168) (notashes) +- perf: Optimize strpos() for ASCII-only inputs [#20295](https://github.com/apache/datafusion/pull/20295) (neilconway) +- perf: Optimize compare_element_to_list [#20323](https://github.com/apache/datafusion/pull/20323) (neilconway) +- perf: Optimize replace() fastpath by avoiding alloc [#20344](https://github.com/apache/datafusion/pull/20344) (neilconway) +- perf: optimize `array_distinct` with batched row conversion [#20364](https://github.com/apache/datafusion/pull/20364) (lyne7-sc) +- perf: Optimize scalar fast path of atan2 [#20336](https://github.com/apache/datafusion/pull/20336) (kumarUjjawal) +- perf: Optimize concat()/concat_ws() UDFs [#20317](https://github.com/apache/datafusion/pull/20317) (neilconway) +- perf: Optimize translate() UDF for scalar inputs [#20305](https://github.com/apache/datafusion/pull/20305) (neilconway) +- perf: Optimize `array_has()` for scalar needle [#20374](https://github.com/apache/datafusion/pull/20374) (neilconway) +- perf: Optimize lpad, rpad for ASCII strings [#20278](https://github.com/apache/datafusion/pull/20278) (neilconway) +- perf: Optimize trim UDFs for single-character trims [#20328](https://github.com/apache/datafusion/pull/20328) (neilconway) +- perf: Optimize scalar fast path for `regexp_like` and rejects g inside combined flags like ig [#20354](https://github.com/apache/datafusion/pull/20354) (kumarUjjawal) +- perf: Use zero-copy slice instead of take kernel in sort merge join [#20463](https://github.com/apache/datafusion/pull/20463) (andygrove) +- perf: Optimize `initcap()` [#20352](https://github.com/apache/datafusion/pull/20352) (neilconway) +- perf: Fix quadratic behavior of `to_array_of_size` [#20459](https://github.com/apache/datafusion/pull/20459) (neilconway) +- perf: Optimize `array_has_any()` with scalar arg [#20385](https://github.com/apache/datafusion/pull/20385) (neilconway) +- perf: Use Hashbrown for array_distinct [#20538](https://github.com/apache/datafusion/pull/20538) (neilconway) +- perf: Cache num_output_rows in sort merge join to avoid O(n) recount [#20478](https://github.com/apache/datafusion/pull/20478) (andygrove) +- perf: Optimize heap handling in TopK operator [#20556](https://github.com/apache/datafusion/pull/20556) (AdamGS) +- perf: Optimize `array_position` for scalar needle [#20532](https://github.com/apache/datafusion/pull/20532) (neilconway) +- perf: Use Arrow vectorized eq kernel for IN list with column references [#20528](https://github.com/apache/datafusion/pull/20528) (zhangxffff) +- perf: Optimize `array_agg()` using `GroupsAccumulator` [#20504](https://github.com/apache/datafusion/pull/20504) (neilconway) +- perf: Optimize `array_to_string()`, support more types [#20553](https://github.com/apache/datafusion/pull/20553) (neilconway) + +**Implemented enhancements:** + +- feat: add list_files_cache table function for `datafusion-cli` [#19388](https://github.com/apache/datafusion/pull/19388) (jizezhang) +- feat: implement metrics for AsyncFuncExec [#19626](https://github.com/apache/datafusion/pull/19626) (feniljain) +- feat: split BatchPartitioner::try_new into hash and round-robin constructors [#19668](https://github.com/apache/datafusion/pull/19668) (mohit7705) +- feat: add Time type support to date_trunc function [#19640](https://github.com/apache/datafusion/pull/19640) (kumarUjjawal) +- feat: Allow log with non-integer base on decimals [#19372](https://github.com/apache/datafusion/pull/19372) (Yuvraj-cyborg) +- feat(spark): implement array_repeat function [#19702](https://github.com/apache/datafusion/pull/19702) (cht42) +- feat(spark): Implement collect_list/collect_set aggregate functions [#19699](https://github.com/apache/datafusion/pull/19699) (cht42) +- feat: implement Spark size function for arrays and maps [#19592](https://github.com/apache/datafusion/pull/19592) (CuteChuanChuan) +- feat: support Set Comparison Subquery [#19109](https://github.com/apache/datafusion/pull/19109) (waynexia) +- feat(spark): implement array slice function [#19811](https://github.com/apache/datafusion/pull/19811) (cht42) +- feat(spark): implement substring function [#19805](https://github.com/apache/datafusion/pull/19805) (cht42) +- feat: Add support for 'isoyear' in date_part function [#19821](https://github.com/apache/datafusion/pull/19821) (cht42) +- feat: support `SELECT DISTINCT id FROM t ORDER BY id LIMIT n` query use GroupedTopKAggregateStream [#19653](https://github.com/apache/datafusion/pull/19653) (haohuaijin) +- feat(spark): add trunc, date_trunc and time_trunc functions [#19829](https://github.com/apache/datafusion/pull/19829) (cht42) +- feat(spark): implement Spark `date_diff` function [#19845](https://github.com/apache/datafusion/pull/19845) (cht42) +- feat(spark): implement add_months function [#19711](https://github.com/apache/datafusion/pull/19711) (cht42) +- feat: support pushdown alias on dynamic filter with `ProjectionExec` [#19404](https://github.com/apache/datafusion/pull/19404) (discord9) +- feat(spark): add `base64` and `unbase64` functions [#19968](https://github.com/apache/datafusion/pull/19968) (cht42) +- feat: Show the number of matched Parquet pages in `DataSourceExec` [#19977](https://github.com/apache/datafusion/pull/19977) (nuno-faria) +- feat(spark): Add `SessionStateBuilderSpark` to datafusion-spark [#19865](https://github.com/apache/datafusion/pull/19865) (cht42) +- feat(spark): implement `from/to_utc_timestamp` functions [#19880](https://github.com/apache/datafusion/pull/19880) (cht42) +- feat(spark): implement `StringView` for `SparkConcat` [#19984](https://github.com/apache/datafusion/pull/19984) (aryan-212) +- feat(spark): add unix date and timestamp functions [#19892](https://github.com/apache/datafusion/pull/19892) (cht42) +- feat: implement protobuf converter trait to allow control over serialization and deserialization processes [#19437](https://github.com/apache/datafusion/pull/19437) (timsaucer) +- feat: optimise copying in `left` for Utf8 and LargeUtf8 [#19980](https://github.com/apache/datafusion/pull/19980) (theirix) +- feat: support Spark-compatible abs math function part 2 - ANSI mode [#18828](https://github.com/apache/datafusion/pull/18828) (hsiang-c) +- feat: add AggregateMode::PartialReduce for tree-reduce aggregation [#20019](https://github.com/apache/datafusion/pull/20019) (njsmith) +- feat: add ExpressionPlacement enum for optimizer expression placement decisions [#20065](https://github.com/apache/datafusion/pull/20065) (adriangb) +- feat: support f16 in coercion logic [#18944](https://github.com/apache/datafusion/pull/18944) (Jefffrey) +- feat: unify left and right functions and benches [#20114](https://github.com/apache/datafusion/pull/20114) (theirix) +- feat(spark): Adds negative spark function [#20006](https://github.com/apache/datafusion/pull/20006) (SubhamSinghal) +- feat: support limited deletion [#20137](https://github.com/apache/datafusion/pull/20137) (askalt) +- feat: Pushdown filters through `UnionExec` nodes [#20145](https://github.com/apache/datafusion/pull/20145) (haohuaijin) +- feat: support Spark-compatible `string_to_map` function [#20120](https://github.com/apache/datafusion/pull/20120) (unknowntpo) +- feat: Add `partition_stats()` for `EmptyExec` [#20203](https://github.com/apache/datafusion/pull/20203) (jonathanc-n) +- feat: add ExtractLeafExpressions optimizer rule for get_field pushdown [#20117](https://github.com/apache/datafusion/pull/20117) (adriangb) +- feat: Push limit into hash join [#20228](https://github.com/apache/datafusion/pull/20228) (jonathanc-n) +- feat: Optimize hash util for `MapArray` [#20179](https://github.com/apache/datafusion/pull/20179) (jonathanc-n) +- feat: Implement Spark `bitmap_bit_position` function [#20275](https://github.com/apache/datafusion/pull/20275) (kazantsev-maksim) +- feat: support sqllogictest output coloring [#20368](https://github.com/apache/datafusion/pull/20368) (theirix) +- feat: support Spark-compatible `json_tuple` function [#20412](https://github.com/apache/datafusion/pull/20412) (CuteChuanChuan) +- feat: Implement Spark `bitmap_bucket_number` function [#20288](https://github.com/apache/datafusion/pull/20288) (kazantsev-maksim) +- feat: support `arrays_zip` function [#20440](https://github.com/apache/datafusion/pull/20440) (comphead) +- feat: Implement Spark `bin` function [#20479](https://github.com/apache/datafusion/pull/20479) (kazantsev-maksim) +- feat: support extension planner for `TableScan` [#20548](https://github.com/apache/datafusion/pull/20548) (linhr) + +**Fixed bugs:** + +- fix: Return Int for Date - Date instead of duration [#19563](https://github.com/apache/datafusion/pull/19563) (kumarUjjawal) +- fix: DynamicFilterPhysicalExpr violates Hash/Eq contract [#19659](https://github.com/apache/datafusion/pull/19659) (kumarUjjawal) +- fix: unnest struct field with an alias failed with internal error [#19698](https://github.com/apache/datafusion/pull/19698) (kumarUjjawal) +- fix(accumulators): preserve state in evaluate() for window frame queries [#19618](https://github.com/apache/datafusion/pull/19618) (GaneshPatil7517) +- fix: Don't treat quoted column names as placeholder variables in SQL [#19339](https://github.com/apache/datafusion/pull/19339) (pmallex) +- fix: enhance CTE resolution with identifier normalization [#19519](https://github.com/apache/datafusion/pull/19519) (kysshsy) +- feat: Add null-aware anti join support [#19635](https://github.com/apache/datafusion/pull/19635) (viirya) +- fix: expose `ListFilesEntry` [#19804](https://github.com/apache/datafusion/pull/19804) (lonless9) +- fix: trunc function with precision uses round instead of trunc semantics [#19794](https://github.com/apache/datafusion/pull/19794) (kumarUjjawal) +- fix: calculate total seconds from interval fields for `extract(epoch)` [#19807](https://github.com/apache/datafusion/pull/19807) (lemorage) +- fix: predicate cache stats calculation [#19561](https://github.com/apache/datafusion/pull/19561) (feniljain) +- fix: preserve state in DistinctMedianAccumulator::evaluate() for window frame queries [#19887](https://github.com/apache/datafusion/pull/19887) (kumarUjjawal) +- fix: null in array_agg with DISTINCT and IGNORE [#19736](https://github.com/apache/datafusion/pull/19736) (davidlghellin) +- fix: union should retrun error instead of panic when input schema's len different [#19922](https://github.com/apache/datafusion/pull/19922) (haohuaijin) +- fix: change token consumption to pick to test on EOF in parser [#19927](https://github.com/apache/datafusion/pull/19927) (askalt) +- fix: maintain inner list nullability for `array_sort` [#19948](https://github.com/apache/datafusion/pull/19948) (Jefffrey) +- fix: Make `generate_series` return an empty set with invalid ranges [#19999](https://github.com/apache/datafusion/pull/19999) (nuno-faria) +- fix: return correct length array for scalar null input to `calculate_binary_math` [#19861](https://github.com/apache/datafusion/pull/19861) (Jefffrey) +- fix: respect DataFrameWriteOptions::with_single_file_output for paths without extensions [#19931](https://github.com/apache/datafusion/pull/19931) (kumarUjjawal) +- fix: correct weight handling in approx_percentile_cont_with_weight [#19941](https://github.com/apache/datafusion/pull/19941) (sesteves) +- fix: The limit_pushdown physical optimization rule removes limits in some cases leading to incorrect results [#20048](https://github.com/apache/datafusion/pull/20048) (masonh22) +- Add duplicate name error reproducer [#20106](https://github.com/apache/datafusion/pull/20106) (gabotechs) +- fix: filter pushdown when merge filter [#20110](https://github.com/apache/datafusion/pull/20110) (haohuaijin) +- fix: Make `serialize_to_file` test cross platform [#20147](https://github.com/apache/datafusion/pull/20147) (nuno-faria) +- fix: regression of `dict_id` in physical plan proto [#20063](https://github.com/apache/datafusion/pull/20063) (kumarUjjawal) +- fix: panic in ListingTableFactory when session is not SessionState [#20139](https://github.com/apache/datafusion/pull/20139) (evangelisilva) +- fix: update comment on FilterPushdownPropagation [#20040](https://github.com/apache/datafusion/pull/20040) (niebayes) +- fix: datatype_is_logically_equal for dictionaries [#20153](https://github.com/apache/datafusion/pull/20153) (dd-annarose) +- fix: Avoid integer overflow in split_part() [#20198](https://github.com/apache/datafusion/pull/20198) (neilconway) +- fix: Fix panic in regexp_like() [#20200](https://github.com/apache/datafusion/pull/20200) (neilconway) +- fix: Handle NULL inputs correctly in find_in_set() [#20209](https://github.com/apache/datafusion/pull/20209) (neilconway) +- fix: Ensure columns are casted to the correct names with Unions [#20146](https://github.com/apache/datafusion/pull/20146) (nuno-faria) +- fix: Avoid assertion failure on divide-by-zero [#20216](https://github.com/apache/datafusion/pull/20216) (neilconway) +- fix: Throw coercion error for `LIKE` operations for nested types. [#20212](https://github.com/apache/datafusion/pull/20212) (jonathanc-n) +- fix: disable dynamic filter pushdown for non min/max aggregates [#20279](https://github.com/apache/datafusion/pull/20279) (notashes) +- fix: Avoid integer overflow in substr() [#20199](https://github.com/apache/datafusion/pull/20199) (neilconway) +- fix: Fix scalar broadcast for to_timestamp() [#20224](https://github.com/apache/datafusion/pull/20224) (neilconway) +- fix: Add integer check for bitwise coercion [#20241](https://github.com/apache/datafusion/pull/20241) (Acfboy) +- fix: percentile_cont interpolation causes NaN for f16 input [#20208](https://github.com/apache/datafusion/pull/20208) (kumarUjjawal) +- fix: validate inter-file ordering in eq_properties() [#20329](https://github.com/apache/datafusion/pull/20329) (adriangb) +- fix: update filter predicates for min/max aggregates only if bounds change [#20380](https://github.com/apache/datafusion/pull/20380) (notashes) +- fix: Handle Utf8View and LargeUtf8 separators in concat_ws [#20361](https://github.com/apache/datafusion/pull/20361) (neilconway) +- fix: HashJoin panic with dictionary-encoded columns in multi-key joins [#20441](https://github.com/apache/datafusion/pull/20441) (Tim-53) +- fix: handle out of range errors in DATE_BIN instead of panicking [#20221](https://github.com/apache/datafusion/pull/20221) (mishop-15) +- fix: prevent duplicate alias collision with user-provided \_\_datafusion_extracted names [#20432](https://github.com/apache/datafusion/pull/20432) (adriangb) +- fix: SortMergeJoin don't wait for all input before emitting [#20482](https://github.com/apache/datafusion/pull/20482) (rluvaton) +- fix: `cardinality()` of an empty array should be zero [#20533](https://github.com/apache/datafusion/pull/20533) (neilconway) +- fix: Unaccounted spill sort in row_hash [#20314](https://github.com/apache/datafusion/pull/20314) (EmilyMatt) +- fix: IS NULL panic with invalid function without input arguments [#20306](https://github.com/apache/datafusion/pull/20306) (Acfboy) +- fix: handle empty delimiter in split_part (closes #20503) [#20542](https://github.com/apache/datafusion/pull/20542) (gferrate) +- fix(substrait): Correctly parse field references in subqueries [#20439](https://github.com/apache/datafusion/pull/20439) (neilconway) +- fix: increase ROUND decimal precision to prevent overflow truncation [#19926](https://github.com/apache/datafusion/pull/19926) (kumarUjjawal) +- fix: Fix `array_to_string` with columnar third arg [#20536](https://github.com/apache/datafusion/pull/20536) (neilconway) +- fix: Fix and Refactor Spark `shuffle` function [#20484](https://github.com/apache/datafusion/pull/20484) (erenavsarogullari) + +**Documentation updates:** + +- perfect hash join [#19411](https://github.com/apache/datafusion/pull/19411) (UBarney) +- docs: Fix two small issues in introduction.md [#19712](https://github.com/apache/datafusion/pull/19712) (AdamGS) +- docs: Refine Communication documentation to highlight Discord [#19714](https://github.com/apache/datafusion/pull/19714) (alamb) +- chore(deps): bump maturin from 1.10.2 to 1.11.5 in /docs [#19740](https://github.com/apache/datafusion/pull/19740) (dependabot[bot]) +- chore: remove LZO Parquet compression [#19726](https://github.com/apache/datafusion/pull/19726) (kumarUjjawal) +- Update 52.0.0 release version number and changelog [#19767](https://github.com/apache/datafusion/pull/19767) (xudong963) +- Update the upgrading.md [#19769](https://github.com/apache/datafusion/pull/19769) (xudong963) +- chore: update copyright notice year [#19758](https://github.com/apache/datafusion/pull/19758) (Jefffrey) +- doc: Add an auto-generated dependency graph for internal crates [#19280](https://github.com/apache/datafusion/pull/19280) (2010YOUY01) +- Docs: Fix some links in docs [#19834](https://github.com/apache/datafusion/pull/19834) (alamb) +- Docs: add additional links to blog posts [#19833](https://github.com/apache/datafusion/pull/19833) (alamb) +- Ensure null inputs to array setop functions return null output [#19683](https://github.com/apache/datafusion/pull/19683) (Jefffrey) +- chore(deps): bump sphinx from 8.2.3 to 9.1.0 in /docs [#19647](https://github.com/apache/datafusion/pull/19647) (dependabot[bot]) +- Fix struct casts to align fields by name (prevent positional mis-casts) [#19674](https://github.com/apache/datafusion/pull/19674) (kosiew) +- chore(deps): bump setuptools from 80.9.0 to 80.10.1 in /docs [#19988](https://github.com/apache/datafusion/pull/19988) (dependabot[bot]) +- minor: Fix doc about `write_batch_size` [#19979](https://github.com/apache/datafusion/pull/19979) (nuno-faria) +- Fix broken links in the documentation [#19964](https://github.com/apache/datafusion/pull/19964) (alamb) +- minor: Add favicon [#20000](https://github.com/apache/datafusion/pull/20000) (nuno-faria) +- docs: Fix some broken / missing links in the DataFusion documentation [#19958](https://github.com/apache/datafusion/pull/19958) (alamb) +- chore(deps): bump setuptools from 80.10.1 to 80.10.2 in /docs [#20022](https://github.com/apache/datafusion/pull/20022) (dependabot[bot]) +- docs: Automatically update DataFusion version in docs [#20001](https://github.com/apache/datafusion/pull/20001) (nuno-faria) +- docs: update data_types.md to reflect current Arrow type mappings [#20072](https://github.com/apache/datafusion/pull/20072) (karuppuchamysuresh) +- Runs-on for `linux-build-lib` and `linux-test` (2X faster CI) [#20107](https://github.com/apache/datafusion/pull/20107) (blaginin) +- Disallow positional struct casting when field names don’t overlap [#19955](https://github.com/apache/datafusion/pull/19955) (kosiew) +- docs: fix docstring formatting [#20158](https://github.com/apache/datafusion/pull/20158) (Jefffrey) +- Break upgrade guides into separate pages [#20183](https://github.com/apache/datafusion/pull/20183) (mishop-15) +- Better document the relationship between `FileFormat::projection` / `FileFormat::filter` and `FileScanConfig::Statistics` [#20188](https://github.com/apache/datafusion/pull/20188) (alamb) +- Document the relationship between FileFormat::projection / FileFormat::filter and FileScanConfig::output_ordering [#20196](https://github.com/apache/datafusion/pull/20196) (alamb) +- More documentation on `FileSource::table_schema` and `FileSource::projection` [#20242](https://github.com/apache/datafusion/pull/20242) (alamb) +- chore(deps): bump setuptools from 80.10.2 to 82.0.0 in /docs [#20255](https://github.com/apache/datafusion/pull/20255) (dependabot[bot]) +- docs: fix typos and improve wording in README [#20301](https://github.com/apache/datafusion/pull/20301) (iampratap7997-dot) +- Reduce ExtractLeafExpressions optimizer overhead with fast pre-scan [#20341](https://github.com/apache/datafusion/pull/20341) (adriangb) +- chore(deps): bump maturin from 1.11.5 to 1.12.2 in /docs [#20400](https://github.com/apache/datafusion/pull/20400) (dependabot[bot]) +- Migrate Python usage to uv workspace [#20414](https://github.com/apache/datafusion/pull/20414) (adriangb) +- test: Extend Spark Array functions: `array_repeat `, `shuffle` and `slice` test coverage [#20420](https://github.com/apache/datafusion/pull/20420) (erenavsarogullari) +- Runs-on for more actions [#20274](https://github.com/apache/datafusion/pull/20274) (blaginin) +- docs: Document that adding new optimizer rules are expensive [#20348](https://github.com/apache/datafusion/pull/20348) (alamb) +- add redirect for old upgrading.html URL to fix broken changelog links [#20582](https://github.com/apache/datafusion/pull/20582) (mishop-15) +- Upgrade DataFusion to arrow-rs/parquet 58.0.0 / `object_store` 0.13.0 [#19728](https://github.com/apache/datafusion/pull/19728) (alamb) +- Document guidance on how to evaluate breaking API changes [#20584](https://github.com/apache/datafusion/pull/20584) (alamb) + +**Other:** + +- Add a protection to release candidate branch 52 [#19660](https://github.com/apache/datafusion/pull/19660) (xudong963) +- Downgrade aws-smithy-runtime, update `rust_decimal`, ignore RUSTSEC-2026-0001 to get clean CI [#19657](https://github.com/apache/datafusion/pull/19657) (alamb) +- Update dependencies [#19667](https://github.com/apache/datafusion/pull/19667) (alamb) +- Refactor PartitionedFile: add ordering field and new_from_meta constructor [#19596](https://github.com/apache/datafusion/pull/19596) (adriangb) +- Remove coalesce batches rule and deprecate CoalesceBatchesExec [#19622](https://github.com/apache/datafusion/pull/19622) (feniljain) +- Perf: Optimize `substring_index` via single-byte fast path and direct indexing [#19590](https://github.com/apache/datafusion/pull/19590) (lyne7-sc) +- refactor: Use `Signature::coercible` for isnan/iszero [#19604](https://github.com/apache/datafusion/pull/19604) (kumarUjjawal) +- Parquet: Push down supported list predicates (array_has/any/all) during decoding [#19545](https://github.com/apache/datafusion/pull/19545) (kosiew) +- Remove dependency on `rust_decimal`, remove ignore of `RUSTSEC-2026-0001` [#19666](https://github.com/apache/datafusion/pull/19666) (alamb) +- Store example data directly inside the datafusion-examples (#19141) [#19319](https://github.com/apache/datafusion/pull/19319) (cj-zhukov) +- minor: More comments to `ParquetOpener::open()` [#19677](https://github.com/apache/datafusion/pull/19677) (2010YOUY01) +- Feat: Allow pow with negative & non-integer exponent on decimals [#19369](https://github.com/apache/datafusion/pull/19369) (Yuvraj-cyborg) +- chore(deps): bump taiki-e/install-action from 2.65.13 to 2.65.15 [#19676](https://github.com/apache/datafusion/pull/19676) (dependabot[bot]) +- Refactor cache APIs to support ordering information [#19597](https://github.com/apache/datafusion/pull/19597) (adriangb) +- Record sort order when writing Parquet with WITH ORDER [#19595](https://github.com/apache/datafusion/pull/19595) (adriangb) +- implement var distinct [#19706](https://github.com/apache/datafusion/pull/19706) (thinh2) +- Fix TopK aggregation for UTF-8/Utf8View group keys and add safe fallback for unsupported string aggregates [#19285](https://github.com/apache/datafusion/pull/19285) (kosiew) +- infer parquet file order from metadata and use it to optimize scans [#19433](https://github.com/apache/datafusion/pull/19433) (adriangb) +- Add support for additional numeric types in to_timestamp functions [#19663](https://github.com/apache/datafusion/pull/19663) (gokselk) +- Fix internal error "Physical input schema should be the same as the one converted from logical input schema." [#18412](https://github.com/apache/datafusion/pull/18412) (alamb) +- fix(functions-aggregate): drain CORR state vectors for streaming aggregation [#19669](https://github.com/apache/datafusion/pull/19669) (geoffreyclaude) +- chore: bump dependabot PR limit for cargo from 5 to 15 [#19730](https://github.com/apache/datafusion/pull/19730) (Jefffrey) +- chore(deps): bump taiki-e/install-action from 2.65.15 to 2.66.1 [#19741](https://github.com/apache/datafusion/pull/19741) (dependabot[bot]) +- chore(deps): bump sqllogictest from 0.28.4 to 0.29.0 [#19744](https://github.com/apache/datafusion/pull/19744) (dependabot[bot]) +- chore(deps): bump blake3 from 1.8.2 to 1.8.3 [#19746](https://github.com/apache/datafusion/pull/19746) (dependabot[bot]) +- chore(deps): bump libc from 0.2.179 to 0.2.180 [#19748](https://github.com/apache/datafusion/pull/19748) (dependabot[bot]) +- chore(deps): bump async-compression from 0.4.36 to 0.4.37 [#19742](https://github.com/apache/datafusion/pull/19742) (dependabot[bot]) +- chore(deps): bump indexmap from 2.12.1 to 2.13.0 [#19747](https://github.com/apache/datafusion/pull/19747) (dependabot[bot]) +- Improve comment for predicate_cache_inner_records [#19762](https://github.com/apache/datafusion/pull/19762) (xudong963) +- Fix dynamic filter is_used function [#19734](https://github.com/apache/datafusion/pull/19734) (LiaCastaneda) +- slt: Add test for REE arrays in group by [#19763](https://github.com/apache/datafusion/pull/19763) (brancz) +- Fix run_tpcds data dir [#19771](https://github.com/apache/datafusion/pull/19771) (gabotechs) +- chore(deps): bump taiki-e/install-action from 2.66.1 to 2.66.2 [#19778](https://github.com/apache/datafusion/pull/19778) (dependabot[bot]) +- Include .proto files in datafusion-proto distribution [#19490](https://github.com/apache/datafusion/pull/19490) (DarkWanderer) +- Simplify `expr = L1 AND expr != L2` to `expr = L1` when `L1 != L2` [#19731](https://github.com/apache/datafusion/pull/19731) (simonvandel) +- chore(deps): bump flate2 from 1.1.5 to 1.1.8 [#19780](https://github.com/apache/datafusion/pull/19780) (dependabot[bot]) +- Upgrade DataFusion to arrow-rs/parquet 57.2.0 [#19355](https://github.com/apache/datafusion/pull/19355) (alamb) +- Expose Spilling Progress Interface in DataFusion [#19708](https://github.com/apache/datafusion/pull/19708) (xudong963) +- dev: Add a script to auto fix all lint violations [#19560](https://github.com/apache/datafusion/pull/19560) (2010YOUY01) +- refactor: Optimize `required_columns` from `BTreeSet` to `Vec` in struct `PushdownChecker` [#19678](https://github.com/apache/datafusion/pull/19678) (kumarUjjawal) +- Revert Workround for Empty FixedSizeBinary Values Buffer After arrow-rs Upgrade [#19801](https://github.com/apache/datafusion/pull/19801) (tobixdev) +- chore(deps): bump taiki-e/install-action from 2.66.2 to 2.66.3 [#19802](https://github.com/apache/datafusion/pull/19802) (dependabot[bot]) +- Add Reproducer for Issues with LEFT joins on Fixed Size Binary Columns [#19800](https://github.com/apache/datafusion/pull/19800) (tobixdev) +- Improvements to `list_files_cache` table function [#19703](https://github.com/apache/datafusion/pull/19703) (alamb) +- Issue 19781 : Internal error: Assertion failed: !self.finished: LimitedBatchCoalescer [#19785](https://github.com/apache/datafusion/pull/19785) (bert-beyondloops) +- physical plan: add `reset_plan_states `, plan re-use benchmark [#19806](https://github.com/apache/datafusion/pull/19806) (askalt) +- chore(deps): bump actions/setup-node from 6.1.0 to 6.2.0 [#19825](https://github.com/apache/datafusion/pull/19825) (dependabot[bot]) +- Use correct setting for click bench queries in sql_planner benchmark [#19835](https://github.com/apache/datafusion/pull/19835) (alamb) +- chore(deps): bump taiki-e/install-action from 2.66.3 to 2.66.5 [#19824](https://github.com/apache/datafusion/pull/19824) (dependabot[bot]) +- chore: refactor scalarvalue/encoding using available upstream arrow-rs methods [#19797](https://github.com/apache/datafusion/pull/19797) (Jefffrey) +- Refactor Spark `date_add`/`date_sub`/`bitwise_not` to remove unnecessary scalar arg check [#19473](https://github.com/apache/datafusion/pull/19473) (Jefffrey) +- Add BatchAdapter to simplify using PhysicalExprAdapter / Projector to map RecordBatch between schemas [#19716](https://github.com/apache/datafusion/pull/19716) (adriangb) +- [Minor] Reuse indices buffer in RepartitionExec [#19775](https://github.com/apache/datafusion/pull/19775) (Dandandan) +- Fix(optimizer): Make `EnsureCooperative` optimizer idempotent under multiple runs [#19757](https://github.com/apache/datafusion/pull/19757) (danielhumanmod) +- Allow dropping qualified columns [#19549](https://github.com/apache/datafusion/pull/19549) (ntjohnson1) +- Doc: Add more blog links to doc comments [#19837](https://github.com/apache/datafusion/pull/19837) (alamb) +- datafusion/common: Add support for hashing ListView arrays [#19814](https://github.com/apache/datafusion/pull/19814) (brancz) +- Project sort expressions in StreamingTable [#19719](https://github.com/apache/datafusion/pull/19719) (timsaucer) +- Fix grouping set subset satisfaction [#19853](https://github.com/apache/datafusion/pull/19853) (freakyzoidberg) +- Spark date part [#19823](https://github.com/apache/datafusion/pull/19823) (cht42) +- chore(deps): bump wasm-bindgen-test from 0.3.56 to 0.3.58 [#19898](https://github.com/apache/datafusion/pull/19898) (dependabot[bot]) +- chore(deps): bump tokio-postgres from 0.7.15 to 0.7.16 [#19899](https://github.com/apache/datafusion/pull/19899) (dependabot[bot]) +- chore(deps): bump postgres-types from 0.2.11 to 0.2.12 [#19902](https://github.com/apache/datafusion/pull/19902) (dependabot[bot]) +- chore(deps): bump insta from 1.46.0 to 1.46.1 [#19901](https://github.com/apache/datafusion/pull/19901) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.66.5 to 2.66.7 [#19883](https://github.com/apache/datafusion/pull/19883) (dependabot[bot]) +- Consolidate cte_quoted_reference.slt into cte.slt [#19862](https://github.com/apache/datafusion/pull/19862) (AnjaliChoudhary99) +- Disable failing `array_union` edge-case with nested null array [#19904](https://github.com/apache/datafusion/pull/19904) (Jefffrey) +- chore(deps): bump the proto group across 1 directory with 5 updates [#19745](https://github.com/apache/datafusion/pull/19745) (dependabot[bot]) +- test(wasmtest): enable compression feature for wasm build [#19860](https://github.com/apache/datafusion/pull/19860) (ChanTsune) +- Feat : added truncate table support [#19633](https://github.com/apache/datafusion/pull/19633) (Nachiket-Roy) +- Remove UDAF manual Debug impls and simplify signatures [#19727](https://github.com/apache/datafusion/pull/19727) (Jefffrey) +- chore(deps): bump thiserror from 2.0.17 to 2.0.18 [#19900](https://github.com/apache/datafusion/pull/19900) (dependabot[bot]) +- Include license and notice files in more crates [#19913](https://github.com/apache/datafusion/pull/19913) (ankane) +- chore(deps): bump actions/setup-python from 6.1.0 to 6.2.0 [#19935](https://github.com/apache/datafusion/pull/19935) (dependabot[bot]) +- Coerce expressions to udtf [#19915](https://github.com/apache/datafusion/pull/19915) (XiangpengHao) +- Fix trailing whitespace in CROSS JOIN logical plan formatting [#19936](https://github.com/apache/datafusion/pull/19936) (mkleen) +- chore(deps): bump chrono from 0.4.42 to 0.4.43 [#19897](https://github.com/apache/datafusion/pull/19897) (dependabot[bot]) +- Improve error message when string functions receive Binary types [#19819](https://github.com/apache/datafusion/pull/19819) (lemorage) +- Refactor ListArray hashing to consider only sliced values [#19500](https://github.com/apache/datafusion/pull/19500) (Jefffrey) +- feat(datafusion-spark): implement spark compatible `unhex` function [#19909](https://github.com/apache/datafusion/pull/19909) (lyne7-sc) +- Support API for "pre-image" for pruning predicate evaluation [#19722](https://github.com/apache/datafusion/pull/19722) (sdf-jkl) +- Support LargeUtf8 as partition column [#19942](https://github.com/apache/datafusion/pull/19942) (paleolimbot) +- chore(deps): bump actions/checkout from 6.0.1 to 6.0.2 [#19953](https://github.com/apache/datafusion/pull/19953) (dependabot[bot]) +- preserve FilterExec batch size during ser/de [#19960](https://github.com/apache/datafusion/pull/19960) (askalt) +- Add struct pushdown query benchmark and projection pushdown tests [#19962](https://github.com/apache/datafusion/pull/19962) (adriangb) +- Improve error messages with nicer formatting of Date and Time types [#19954](https://github.com/apache/datafusion/pull/19954) (emilk) +- export `SessionState::register_catalog_list(...)` [#19925](https://github.com/apache/datafusion/pull/19925) (askalt) +- Change GitHub actions dependabot schedule to weekly [#19981](https://github.com/apache/datafusion/pull/19981) (Jefffrey) +- chore(deps): bump taiki-e/install-action from 2.66.7 to 2.67.9 [#19987](https://github.com/apache/datafusion/pull/19987) (dependabot[bot]) +- chore(deps): bump quote from 1.0.43 to 1.0.44 [#19992](https://github.com/apache/datafusion/pull/19992) (dependabot[bot]) +- chore(deps): bump nix from 0.30.1 to 0.31.1 [#19991](https://github.com/apache/datafusion/pull/19991) (dependabot[bot]) +- chore(deps): bump sysinfo from 0.37.2 to 0.38.0 [#19990](https://github.com/apache/datafusion/pull/19990) (dependabot[bot]) +- chore(deps): bump uuid from 1.19.0 to 1.20.0 [#19993](https://github.com/apache/datafusion/pull/19993) (dependabot[bot]) +- minor: pull `uuid` into workspace dependencies [#19997](https://github.com/apache/datafusion/pull/19997) (Jefffrey) +- Fix ClickBench EventDate handling by casting UInt16 days-since-epoch to DATE via `hits` view [#19881](https://github.com/apache/datafusion/pull/19881) (kosiew) +- refactor: extract pushdown test utilities to shared module [#20010](https://github.com/apache/datafusion/pull/20010) (adriangb) +- chore(deps): bump taiki-e/install-action from 2.67.9 to 2.67.13 [#20020](https://github.com/apache/datafusion/pull/20020) (dependabot[bot]) +- add more projection pushdown slt tests [#20015](https://github.com/apache/datafusion/pull/20015) (adriangb) +- minor: Move metric `page_index_rows_pruned` to verbose level in `EXPLAIN ANALYZE` [#20026](https://github.com/apache/datafusion/pull/20026) (2010YOUY01) +- Tweak `adapter serialization` example [#20035](https://github.com/apache/datafusion/pull/20035) (adriangb) +- Simplify wait_complete function [#19937](https://github.com/apache/datafusion/pull/19937) (LiaCastaneda) +- [main] Update version to `52.1.0` (#19878) [#20028](https://github.com/apache/datafusion/pull/20028) (alamb) +- Fix/parquet opener page index policy [#19890](https://github.com/apache/datafusion/pull/19890) (aviralgarg05) +- minor: add tests for coercible signature considering nulls/dicts/ree [#19459](https://github.com/apache/datafusion/pull/19459) (Jefffrey) +- Enforce `clippy::allow_attributes` globally across workspace [#19576](https://github.com/apache/datafusion/pull/19576) (Jefffrey) +- Fix constant value from stats [#20042](https://github.com/apache/datafusion/pull/20042) (gabotechs) +- Simplify Spark `sha2` implementation [#19475](https://github.com/apache/datafusion/pull/19475) (Jefffrey) +- Further refactoring of type coercion function code [#19603](https://github.com/apache/datafusion/pull/19603) (Jefffrey) +- replace private is_volatile_expression_tree with equivalent public is_volatile [#20056](https://github.com/apache/datafusion/pull/20056) (adriangb) +- Improve documentation for ScalarUDFImpl::preimage [#20008](https://github.com/apache/datafusion/pull/20008) (alamb) +- Use BooleanBufferBuilder rather than Vec in ArrowBytesViewMap [#20064](https://github.com/apache/datafusion/pull/20064) (etk18) +- chore: Add microbenchmark (compared to ExprOrExpr) [#20076](https://github.com/apache/datafusion/pull/20076) (CuteChuanChuan) +- Minor: update tests in limit_pushdown.rs to insta [#20066](https://github.com/apache/datafusion/pull/20066) (alamb) +- Reduce number of traversals per node in `PhysicalExprSimplifier` [#20082](https://github.com/apache/datafusion/pull/20082) (AdamGS) +- Automatically generate examples documentation adv (#19294) [#19750](https://github.com/apache/datafusion/pull/19750) (cj-zhukov) +- Implement preimage for floor function to enable predicate pushdown [#20059](https://github.com/apache/datafusion/pull/20059) (devanshu0987) +- Refactor `iszero()` and `isnan()` to accept all numeric types [#20093](https://github.com/apache/datafusion/pull/20093) (kumarUjjawal) +- Use return_field_from_args in information schema and date_trunc [#20079](https://github.com/apache/datafusion/pull/20079) (AndreaBozzo) +- Preserve PhysicalExpr graph in proto round trip using Arc pointers as unique identifiers [#20037](https://github.com/apache/datafusion/pull/20037) (adriangb) +- add ability to customize tokens in parser [#19978](https://github.com/apache/datafusion/pull/19978) (askalt) +- Adjust `case_when DivideByZeroProtection` benchmark so that "percentage of zeroes" corresponds to "number of times protection is needed" [#20105](https://github.com/apache/datafusion/pull/20105) (pepijnve) +- refactor: Rename `FileSource::try_reverse_output` to `FileSource::try_pushdown_sort` [#20043](https://github.com/apache/datafusion/pull/20043) (kumarUjjawal) +- Improve memory accounting for ArrowBytesViewMap [#20077](https://github.com/apache/datafusion/pull/20077) (vigneshsiva11) +- chore: reduce production noise by using `debug` macro [#19885](https://github.com/apache/datafusion/pull/19885) (Standing-Man) +- chore(deps): bump taiki-e/install-action from 2.67.13 to 2.67.18 [#20124](https://github.com/apache/datafusion/pull/20124) (dependabot[bot]) +- chore(deps): bump actions/setup-node from 4 to 6 [#20125](https://github.com/apache/datafusion/pull/20125) (dependabot[bot]) +- chore(deps): bump tonic from 0.14.2 to 0.14.3 [#20127](https://github.com/apache/datafusion/pull/20127) (dependabot[bot]) +- chore(deps): bump insta from 1.46.1 to 1.46.3 [#20129](https://github.com/apache/datafusion/pull/20129) (dependabot[bot]) +- chore(deps): bump flate2 from 1.1.8 to 1.1.9 [#20130](https://github.com/apache/datafusion/pull/20130) (dependabot[bot]) +- chore(deps): bump clap from 4.5.54 to 4.5.56 [#20131](https://github.com/apache/datafusion/pull/20131) (dependabot[bot]) +- Add BufferExec execution plan [#19760](https://github.com/apache/datafusion/pull/19760) (gabotechs) +- Optimize the evaluation of date_part() == when pushed down [#19733](https://github.com/apache/datafusion/pull/19733) (sdf-jkl) +- chore(deps): bump bytes from 1.11.0 to 1.11.1 [#20141](https://github.com/apache/datafusion/pull/20141) (dependabot[bot]) +- Make session state builder clonable [#20136](https://github.com/apache/datafusion/pull/20136) (askalt) +- chore: remove datatype check functions in favour of upstream versions [#20104](https://github.com/apache/datafusion/pull/20104) (Jefffrey) +- Add Decimal support for floor preimage [#20099](https://github.com/apache/datafusion/pull/20099) (devanshu0987) +- Add more struct pushdown tests and planning benchmark [#20143](https://github.com/apache/datafusion/pull/20143) (adriangb) +- Add RepartitionExec test to projection_pushdown.slt [#20156](https://github.com/apache/datafusion/pull/20156) (adriangb) +- chore: Fix typos in comments [#20157](https://github.com/apache/datafusion/pull/20157) (neilconway) +- Fix `array_repeat` handling of null count values [#20102](https://github.com/apache/datafusion/pull/20102) (lyne7-sc) +- Refactor schema rewriter: remove lifetimes, extract column/cast helpers, add mismatch coverage [#20166](https://github.com/apache/datafusion/pull/20166) (kosiew) +- chore(deps): bump time from 0.3.44 to 0.3.47 [#20172](https://github.com/apache/datafusion/pull/20172) (dependabot[bot]) +- chore(deps-dev): bump webpack from 5.94.0 to 5.105.0 in /datafusion/wasmtest/datafusion-wasm-app [#20178](https://github.com/apache/datafusion/pull/20178) (dependabot[bot]) +- Fix Arrow Spill Underrun [#20159](https://github.com/apache/datafusion/pull/20159) (cetra3) +- nom parser instead of ad-hoc in examples [#20122](https://github.com/apache/datafusion/pull/20122) (cj-zhukov) +- fix(datafusion-cli): solve row count bug adding`saturating_add` to prevent potential overflow [#20185](https://github.com/apache/datafusion/pull/20185) (dariocurr) +- Enable inlist support for preimage [#20051](https://github.com/apache/datafusion/pull/20051) (sdf-jkl) +- unify the prettier versions [#20167](https://github.com/apache/datafusion/pull/20167) (cj-zhukov) +- chore: Unbreak doctest CI [#20218](https://github.com/apache/datafusion/pull/20218) (neilconway) +- Minor: verify plan output and unique field names [#20220](https://github.com/apache/datafusion/pull/20220) (alamb) +- Add more tests to projection_pushdown.slt [#20236](https://github.com/apache/datafusion/pull/20236) (adriangb) +- Add Expr::Alias passthrough to Expr::placement() [#20237](https://github.com/apache/datafusion/pull/20237) (adriangb) +- Make PushDownFilter and CommonSubexprEliminate aware of Expr::placement [#20239](https://github.com/apache/datafusion/pull/20239) (adriangb) +- Refactor example metadata parsing utilities(#20204) [#20233](https://github.com/apache/datafusion/pull/20233) (cj-zhukov) +- add module structure and unit tests for expression pushdown logical optimizer [#20238](https://github.com/apache/datafusion/pull/20238) (adriangb) +- repro and disable dyn filter for preserve file partitions [#20175](https://github.com/apache/datafusion/pull/20175) (gene-bordegaray) +- chore(deps): bump taiki-e/install-action from 2.67.18 to 2.67.27 [#20254](https://github.com/apache/datafusion/pull/20254) (dependabot[bot]) +- chore(deps): bump sysinfo from 0.38.0 to 0.38.1 [#20261](https://github.com/apache/datafusion/pull/20261) (dependabot[bot]) +- chore(deps): bump clap from 4.5.56 to 4.5.57 [#20265](https://github.com/apache/datafusion/pull/20265) (dependabot[bot]) +- chore(deps): bump tempfile from 3.24.0 to 3.25.0 [#20262](https://github.com/apache/datafusion/pull/20262) (dependabot[bot]) +- chore(deps): bump regex from 1.12.2 to 1.12.3 [#20260](https://github.com/apache/datafusion/pull/20260) (dependabot[bot]) +- chore(deps): bump criterion from 0.8.1 to 0.8.2 [#20258](https://github.com/apache/datafusion/pull/20258) (dependabot[bot]) +- chore(deps): bump regex-syntax from 0.8.8 to 0.8.9 [#20264](https://github.com/apache/datafusion/pull/20264) (dependabot[bot]) +- chore(deps): bump aws-config from 1.8.12 to 1.8.13 [#20263](https://github.com/apache/datafusion/pull/20263) (dependabot[bot]) +- chore(deps): bump async-compression from 0.4.37 to 0.4.39 [#20259](https://github.com/apache/datafusion/pull/20259) (dependabot[bot]) +- Support JSON arrays reader/parse for datafusion [#19924](https://github.com/apache/datafusion/pull/19924) (zhuqi-lucas) +- chore: Add confirmation before tarball is released [#20207](https://github.com/apache/datafusion/pull/20207) (milenkovicm) +- FilterExec should remap indices of parent dynamic filters [#20286](https://github.com/apache/datafusion/pull/20286) (jackkleeman) +- Clean up expression placement UDF usage in tests [#20272](https://github.com/apache/datafusion/pull/20272) (adriangb) +- chore(deps): bump the arrow-parquet group with 7 updates [#20256](https://github.com/apache/datafusion/pull/20256) (dependabot[bot]) +- Cleanup example metadata parsing utilities(#20251) [#20252](https://github.com/apache/datafusion/pull/20252) (cj-zhukov) +- Add `StructArray` and `RunArray` benchmark tests to `with_hashes` [#20182](https://github.com/apache/datafusion/pull/20182) (notashes) +- Add protoc support for ArrowScanExecNode (#20280) [#20284](https://github.com/apache/datafusion/pull/20284) (JoshElkind) +- Improve ExternalSorter ResourcesExhausted Error Message [#20226](https://github.com/apache/datafusion/pull/20226) (erenavsarogullari) +- Introduce ProjectionExprs::unproject_exprs/project_exprs and improve docs [#20193](https://github.com/apache/datafusion/pull/20193) (alamb) +- chore: Remove "extern crate criterion" in benches [#20299](https://github.com/apache/datafusion/pull/20299) (neilconway) +- Support pushing down empty projections into joins [#20191](https://github.com/apache/datafusion/pull/20191) (jackkleeman) +- chore: change width_bucket buckets parameter from i32 to i64 [#20330](https://github.com/apache/datafusion/pull/20330) (comphead) +- fix null handling for `nanvl` & implement fast path [#20205](https://github.com/apache/datafusion/pull/20205) (kumarUjjawal) +- unify the prettier version adv(#20024) [#20311](https://github.com/apache/datafusion/pull/20311) (cj-zhukov) +- chore: Make memchr a workspace dependency [#20345](https://github.com/apache/datafusion/pull/20345) (neilconway) +- feat(datafusion-cli): enhance CLI helper with default hint [#20310](https://github.com/apache/datafusion/pull/20310) (dariocurr) +- Adds support for ANSI mode in negative function [#20189](https://github.com/apache/datafusion/pull/20189) (SubhamSinghal) +- Support parent dynamic filters for more join types [#20192](https://github.com/apache/datafusion/pull/20192) (jackkleeman) +- Fix incorrect `SortExec` removal before `AggregateExec` (option 2) [#20247](https://github.com/apache/datafusion/pull/20247) (alamb) +- Fix `try_shrink` not freeing back to pool [#20382](https://github.com/apache/datafusion/pull/20382) (cetra3) +- chore(deps): bump sysinfo from 0.38.1 to 0.38.2 [#20411](https://github.com/apache/datafusion/pull/20411) (dependabot[bot]) +- chore(deps): bump indicatif from 0.18.3 to 0.18.4 [#20410](https://github.com/apache/datafusion/pull/20410) (dependabot[bot]) +- chore(deps): bump liblzma from 0.4.5 to 0.4.6 [#20409](https://github.com/apache/datafusion/pull/20409) (dependabot[bot]) +- chore(deps): bump aws-config from 1.8.13 to 1.8.14 [#20407](https://github.com/apache/datafusion/pull/20407) (dependabot[bot]) +- chore(deps): bump tonic from 0.14.3 to 0.14.4 [#20406](https://github.com/apache/datafusion/pull/20406) (dependabot[bot]) +- chore(deps): bump clap from 4.5.57 to 4.5.59 [#20404](https://github.com/apache/datafusion/pull/20404) (dependabot[bot]) +- chore(deps): bump sqllogictest from 0.29.0 to 0.29.1 [#20405](https://github.com/apache/datafusion/pull/20405) (dependabot[bot]) +- chore(deps): bump env_logger from 0.11.8 to 0.11.9 [#20402](https://github.com/apache/datafusion/pull/20402) (dependabot[bot]) +- chore(deps): bump actions/stale from 10.1.1 to 10.2.0 [#20397](https://github.com/apache/datafusion/pull/20397) (dependabot[bot]) +- chore(deps): bump uuid from 1.20.0 to 1.21.0 [#20401](https://github.com/apache/datafusion/pull/20401) (dependabot[bot]) +- [Minor] Update object_store to 0.12.5 [#20378](https://github.com/apache/datafusion/pull/20378) (Dandandan) +- chore(deps): bump syn from 2.0.114 to 2.0.116 [#20399](https://github.com/apache/datafusion/pull/20399) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.67.27 to 2.68.0 [#20398](https://github.com/apache/datafusion/pull/20398) (dependabot[bot]) +- chore: Cleanup returning null arrays [#20423](https://github.com/apache/datafusion/pull/20423) (neilconway) +- chore: fix labeler for `datafusion-functions-nested` [#20442](https://github.com/apache/datafusion/pull/20442) (comphead) +- build: update Rust toolchain version from 1.92.0 to 1.93.0 in `rust-toolchain.toml` [#20309](https://github.com/apache/datafusion/pull/20309) (dariocurr) +- chore: Cleanup "!is_valid(i)" -> "is_null(i)" [#20453](https://github.com/apache/datafusion/pull/20453) (neilconway) +- refactor: Extract sort-merge join filter logic into separate module [#19614](https://github.com/apache/datafusion/pull/19614) (viirya) +- Implement FFI table provider factory [#20326](https://github.com/apache/datafusion/pull/20326) (davisp) +- bench: Add criterion benchmark for sort merge join [#20464](https://github.com/apache/datafusion/pull/20464) (andygrove) +- chore: group minor dependencies into single PR [#20457](https://github.com/apache/datafusion/pull/20457) (comphead) +- chore(deps): bump taiki-e/install-action from 2.68.0 to 2.68.6 [#20467](https://github.com/apache/datafusion/pull/20467) (dependabot[bot]) +- chore(deps): bump astral-sh/setup-uv from 6.1.0 to 7.3.0 [#20468](https://github.com/apache/datafusion/pull/20468) (dependabot[bot]) +- chore(deps): bump the all-other-cargo-deps group with 6 updates [#20470](https://github.com/apache/datafusion/pull/20470) (dependabot[bot]) +- chore(deps): bump testcontainers-modules from 0.14.0 to 0.15.0 [#20471](https://github.com/apache/datafusion/pull/20471) (dependabot[bot]) +- [Minor] Use buffer_unordered [#20462](https://github.com/apache/datafusion/pull/20462) (Dandandan) +- bench: Add IN list benchmarks for non-constant list expressions [#20444](https://github.com/apache/datafusion/pull/20444) (zhangxffff) +- feat(memory-tracking): implement arrow_buffer::MemoryPool for MemoryPool [#18928](https://github.com/apache/datafusion/pull/18928) (notfilippo) +- chore: Avoid build fails on MinIO rate limits [#20472](https://github.com/apache/datafusion/pull/20472) (comphead) +- chore: Add end-to-end benchmark for array_agg, code cleanup [#20496](https://github.com/apache/datafusion/pull/20496) (neilconway) +- Upgrade to sqlparser 0.61.0 [#20177](https://github.com/apache/datafusion/pull/20177) (alamb) +- Switch to the latest Mac OS [#20510](https://github.com/apache/datafusion/pull/20510) (blaginin) +- Fix name tracker [#19856](https://github.com/apache/datafusion/pull/19856) (xanderbailey) +- Runs-on for extended CI checks [#20511](https://github.com/apache/datafusion/pull/20511) (blaginin) +- chore(deps): bump strum from 0.27.2 to 0.28.0 [#20520](https://github.com/apache/datafusion/pull/20520) (dependabot[bot]) +- chore(deps): bump taiki-e/install-action from 2.68.6 to 2.68.8 [#20518](https://github.com/apache/datafusion/pull/20518) (dependabot[bot]) +- chore(deps): bump the all-other-cargo-deps group with 2 updates [#20519](https://github.com/apache/datafusion/pull/20519) (dependabot[bot]) +- Make `custom_file_casts` example schema nullable to allow null `id` values during casting [#20486](https://github.com/apache/datafusion/pull/20486) (kosiew) +- Add support for FFI config extensions [#19469](https://github.com/apache/datafusion/pull/19469) (timsaucer) +- chore: Cleanup code to use `repeat_n` in a few places [#20527](https://github.com/apache/datafusion/pull/20527) (neilconway) +- chore(deps): bump strum_macros from 0.27.2 to 0.28.0 [#20521](https://github.com/apache/datafusion/pull/20521) (dependabot[bot]) +- chore: Replace `matches!` on fieldless enums with `==` [#20525](https://github.com/apache/datafusion/pull/20525) (neilconway) +- Update comments on OptimizerRule about function name matching [#20346](https://github.com/apache/datafusion/pull/20346) (alamb) +- Fix incorrect regex pattern in regex_replace_posix_groups [#19827](https://github.com/apache/datafusion/pull/19827) (GaneshPatil7517) +- Improve `HashJoinExecBuilder` to save state from previous fields [#20276](https://github.com/apache/datafusion/pull/20276) (askalt) +- [Minor] Fix error messages for `shrink` and `try_shrink` [#20422](https://github.com/apache/datafusion/pull/20422) (hareshkh) +- Fix physical expr adapter to resolve physical fields by name, not column index [#20485](https://github.com/apache/datafusion/pull/20485) (kosiew) +- [fix] Add type coercion from NULL to Interval to make date_bin more postgres compatible [#20499](https://github.com/apache/datafusion/pull/20499) (LiaCastaneda) +- Clamp early aggregation emit to the sort boundary when using partial group ordering [#20446](https://github.com/apache/datafusion/pull/20446) (jackkleeman) +- Split `push_down_filter.slt` into standalone sqllogictest files to reduce long-tail runtime [#20566](https://github.com/apache/datafusion/pull/20566) (kosiew) +- Add deterministic per-file timing summary to sqllogictest runner [#20569](https://github.com/apache/datafusion/pull/20569) (kosiew) +- chore: Enable workspace lint for all workspace members [#20577](https://github.com/apache/datafusion/pull/20577) (neilconway) +- Fix serde of window lead/lag defaults [#20608](https://github.com/apache/datafusion/pull/20608) (avantgardnerio) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 73 dependabot[bot] + 35 Neil Conway + 31 Kumar Ujjawal + 27 Andrew Lamb + 26 Adrian Garcia Badaracco + 21 Jeffrey Vo + 13 cht42 + 10 Albert Skalt + 10 kosiew + 10 lyne + 8 Nuno Faria + 7 Sergey Zhukov + 7 xudong.w + 6 Daniël Heres + 5 Adam Gutglick + 5 Gabriel + 5 Oleks V + 4 Andy Grove + 4 Dmitrii Blaginin + 4 Huaijin + 4 Jack Kleeman + 4 Jonathan Chen + 4 Yongting You + 4 notashes + 4 theirix + 3 Eren Avsarogullari + 3 Kazantsev Maksim + 3 Kosta Tarasov + 3 Liang-Chi Hsieh + 3 Lía Adriana + 3 Tim Saucer + 3 Yu-Chuan Hung + 3 dario curreri + 3 feniljain + 3 mishop-15 + 2 Acfboy + 2 Alan Tang + 2 Devanshu + 2 Frederic Branczyk + 2 Ganesh Patil + 2 Miao + 2 Michael Kleen + 2 Pepijn Van Eeckhoudt + 2 Peter L + 2 Subham Singhal + 2 Tobias Schwarzinger + 2 UBarney + 2 Yuvraj + 2 Zhang Xiaofeng + 2 jizezhang + 2 niebayes + 1 Andrea Bozzo + 1 Andrew Kane + 1 Anjali Choudhary + 1 Anna-Rose Lescure + 1 Aryan Anand + 1 Aviral Garg + 1 Bert Vermeiren + 1 Brent Gardner + 1 ChanTsune + 1 David López + 1 Dewey Dunnington + 1 Divyansh Pratap Singh + 1 Eesh Sagar Singh + 1 Emil Ernerfeldt + 1 Emily Matheys + 1 Eric Chang + 1 Evangeli Silva + 1 Filippo + 1 Gabriel Ferraté + 1 Gene Bordegaray + 1 Geoffrey Claude + 1 Goksel Kabadayi + 1 Haresh Khanna + 1 Heran Lin + 1 Josh Elkind + 1 Marko Milenković + 1 Mason + 1 Mikhail Zabaluev + 1 Mohit rao + 1 Nathaniel J. Smith + 1 Nick + 1 Oleg V. Kozlyuk + 1 Paul J. Davis + 1 Pierre Lacave + 1 Qi Zhu + 1 Raz Luvaton + 1 Rosai + 1 Ruihang Xia + 1 Sergio Esteves + 1 Simon Vandel Sillesen + 1 Siyuan Huang + 1 Tim-53 + 1 Tushar Das + 1 Vignesh + 1 XL Liang + 1 Xander + 1 Xiangpeng Hao + 1 comphead + 1 danielhumanmod + 1 discord9 + 1 hsiang-c + 1 iamthinh + 1 karuppuchamysuresh + 1 pmallex +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. diff --git a/docs/source/download.md b/docs/source/download.md index 3be76a6acf7b4..ed8fc06440f0c 100644 --- a/docs/source/download.md +++ b/docs/source/download.md @@ -26,7 +26,7 @@ For example: ```toml [dependencies] -datafusion = "52.1.0" +datafusion = "53.0.0" ``` While DataFusion is distributed via [crates.io] as a convenience, the diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index e48f0a7c92276..11a1a8a2d6831 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -101,7 +101,7 @@ The following configuration settings are available: | datafusion.execution.parquet.dictionary_page_size_limit | 1048576 | (writing) Sets best effort maximum dictionary page size, in bytes | | datafusion.execution.parquet.statistics_enabled | page | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_row_group_size | 1048576 | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 52.1.0 | (writing) Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 53.0.0 | (writing) Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | 64 | (writing) Sets column index truncate length | | datafusion.execution.parquet.statistics_truncate_length | 64 | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.data_page_row_count_limit | 20000 | (writing) Sets best effort maximum number of rows in data page | diff --git a/docs/source/user-guide/crate-configuration.md b/docs/source/user-guide/crate-configuration.md index 44b4d39839c5b..2acb2140efcbf 100644 --- a/docs/source/user-guide/crate-configuration.md +++ b/docs/source/user-guide/crate-configuration.md @@ -156,7 +156,7 @@ By default, Datafusion returns errors as a plain text message. You can enable mo such as backtraces by enabling the `backtrace` feature to your `Cargo.toml` file like this: ```toml -datafusion = { version = "52.1.0", features = ["backtrace"]} +datafusion = { version = "53.0.0", features = ["backtrace"]} ``` Set environment [variables](https://doc.rust-lang.org/std/backtrace/index.html#environment-variables) diff --git a/docs/source/user-guide/example-usage.md b/docs/source/user-guide/example-usage.md index 83ba530d2b3b6..fd755715eec91 100644 --- a/docs/source/user-guide/example-usage.md +++ b/docs/source/user-guide/example-usage.md @@ -29,7 +29,7 @@ Find latest available Datafusion version on [DataFusion's crates.io] page. Add the dependency to your `Cargo.toml` file: ```toml -datafusion = "52.1.0" +datafusion = "53.0.0" tokio = { version = "1.0", features = ["rt-multi-thread"] } ```