diff --git a/dev/diffs/3.4.3.diff b/dev/diffs/3.4.3.diff index 44f7601e36..c8e20559aa 100644 --- a/dev/diffs/3.4.3.diff +++ b/dev/diffs/3.4.3.diff @@ -2032,7 +2032,7 @@ index 07e2849ce6f..3e73645b638 100644 ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString ) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -index 104b4e416cd..b8af360fa14 100644 +index 104b4e416cd..f05bfdecb69 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -38,6 +38,7 @@ import org.apache.parquet.schema.MessageType @@ -2062,7 +2062,7 @@ index 104b4e416cd..b8af360fa14 100644 - test("Filters should be pushed down for vectorized Parquet reader at row group level") { + test("Filters should be pushed down for vectorized Parquet reader at row group level", -+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) { ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { import testImplicits._ withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", @@ -2098,7 +2098,7 @@ index 104b4e416cd..b8af360fa14 100644 - test("filter pushdown - StringPredicate") { + test("filter pushdown - StringPredicate", -+ IgnoreCometNativeDataFusion("cannot be pushed down")) { ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { import testImplicits._ // keep() should take effect on StartsWith/EndsWith/Contains Seq( @@ -2153,7 +2153,7 @@ index 104b4e416cd..b8af360fa14 100644 - test("SPARK-34562: Bloom filter push down") { + test("SPARK-34562: Bloom filter push down", -+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) { ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { withTempPath { dir => val path = dir.getCanonicalPath spark.range(100).selectExpr("id * 2 AS id") diff --git a/dev/diffs/3.5.8.diff b/dev/diffs/3.5.8.diff index c879ac8f2d..357c1f84d9 100644 --- a/dev/diffs/3.5.8.diff +++ b/dev/diffs/3.5.8.diff @@ -1982,7 +1982,7 @@ index 07e2849ce6f..3e73645b638 100644 ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString ) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -index 8e88049f51e..f9d515edee1 100644 +index 8e88049f51e..e3b20fe8845 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -1095,7 +1095,11 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared @@ -2004,7 +2004,7 @@ index 8e88049f51e..f9d515edee1 100644 - test("Filters should be pushed down for vectorized Parquet reader at row group level") { + test("Filters should be pushed down for vectorized Parquet reader at row group level", -+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) { ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { import testImplicits._ withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", @@ -2034,16 +2034,17 @@ index 8e88049f51e..f9d515edee1 100644 } } } -@@ -1699,7 +1712,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -1699,7 +1712,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared (attr, value) => sources.StringContains(attr, value)) } - test("filter pushdown - StringPredicate") { -+ test("filter pushdown - StringPredicate", IgnoreCometNativeScan("cannot be pushed down")) { ++ test("filter pushdown - StringPredicate", ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { import testImplicits._ // keep() should take effect on StartsWith/EndsWith/Contains Seq( -@@ -1743,7 +1756,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -1743,7 +1757,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } @@ -2053,9 +2054,9 @@ index 8e88049f51e..f9d515edee1 100644 val schema = StructType(Seq( StructField("a", IntegerType, nullable = false) )) -@@ -1949,11 +1965,24 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -1949,11 +1964,24 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared """.stripMargin) - + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - val e = intercept[SparkException] { + // Spark native readers wrap the error in SparkException(FAILED_READ_FILE). @@ -2081,7 +2082,7 @@ index 8e88049f51e..f9d515edee1 100644 } withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { -@@ -1984,7 +2013,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -1984,7 +2012,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } @@ -2091,17 +2092,17 @@ index 8e88049f51e..f9d515edee1 100644 // block 1: // null count min max // page-0 0 0 99 -@@ -2044,7 +2074,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -2044,7 +2073,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } - test("SPARK-34562: Bloom filter push down") { + test("SPARK-34562: Bloom filter push down", -+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) { ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { withTempPath { dir => val path = dir.getCanonicalPath spark.range(100).selectExpr("id * 2 AS id") -@@ -2276,7 +2307,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite { +@@ -2276,7 +2306,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite { assert(pushedParquetFilters.exists(_.getClass === filterClass), s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.") @@ -2114,7 +2115,7 @@ index 8e88049f51e..f9d515edee1 100644 } else { assert(selectedFilters.isEmpty, "There is filter pushed down") } -@@ -2336,7 +2371,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite { +@@ -2336,7 +2370,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite { assert(pushedParquetFilters.exists(_.getClass === filterClass), s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.") diff --git a/dev/diffs/4.0.1.diff b/dev/diffs/4.0.1.diff index 4775c3a2f6..3932666045 100644 --- a/dev/diffs/4.0.1.diff +++ b/dev/diffs/4.0.1.diff @@ -246,12 +246,12 @@ index aa3d02dc2fb..c4f878d9908 100644 WITH t(c1) AS (SELECT replace(listagg(DISTINCT col1 COLLATE unicode_rtrim) COLLATE utf8_binary, ' ', '') FROM (VALUES ('xbc '), ('xbc '), ('a'), ('xbc'))) SELECT len(c1), regexp_count(c1, 'a'), regexp_count(c1, 'xbc') FROM t; WITH t(c1) AS (SELECT listagg(col1) WITHIN GROUP (ORDER BY col1 COLLATE unicode_rtrim) FROM (VALUES ('abc '), ('abc\n'), ('abc'), ('x'))) SELECT replace(replace(c1, ' ', ''), '\n', '$') FROM t; diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql -index 0000000..0000000 100644 +index 41fd4de2a09..162d5a817b6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql @@ -6,6 +6,10 @@ -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L352-L605 - + -- Test aggregate operator with codegen on and off. + +-- Floating-point precision difference between DataFusion and JVM for FILTER aggregates @@ -2765,7 +2765,7 @@ index cd6f41b4ef4..4b6a17344bc 100644 ParquetOutputFormat.WRITER_VERSION -> ParquetProperties.WriterVersion.PARQUET_2_0.toString ) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -index 6080a5e8e4b..ea058d57b4b 100644 +index 6080a5e8e4b..0d394024e85 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -38,6 +38,7 @@ import org.apache.parquet.schema.MessageType @@ -2795,7 +2795,7 @@ index 6080a5e8e4b..ea058d57b4b 100644 - test("Filters should be pushed down for vectorized Parquet reader at row group level") { + test("Filters should be pushed down for vectorized Parquet reader at row group level", -+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) { ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { import testImplicits._ withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", @@ -2825,16 +2825,17 @@ index 6080a5e8e4b..ea058d57b4b 100644 } } } -@@ -1706,7 +1720,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -1706,7 +1720,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared (attr, value) => sources.StringContains(attr, value)) } - test("filter pushdown - StringPredicate") { -+ test("filter pushdown - StringPredicate", IgnoreCometNativeScan("cannot be pushed down")) { ++ test("filter pushdown - StringPredicate", ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { import testImplicits._ // keep() should take effect on StartsWith/EndsWith/Contains Seq( -@@ -1750,7 +1764,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -1750,7 +1765,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } @@ -2844,7 +2845,7 @@ index 6080a5e8e4b..ea058d57b4b 100644 val schema = StructType(Seq( StructField("a", IntegerType, nullable = false) )) -@@ -1956,13 +1971,21 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -1956,13 +1972,21 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared """.stripMargin) withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { @@ -2871,7 +2872,7 @@ index 6080a5e8e4b..ea058d57b4b 100644 } withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { -@@ -1993,7 +2016,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -1993,7 +2017,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } @@ -2881,17 +2882,17 @@ index 6080a5e8e4b..ea058d57b4b 100644 // block 1: // null count min max // page-0 0 0 99 -@@ -2053,7 +2077,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared +@@ -2053,7 +2078,8 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } - test("SPARK-34562: Bloom filter push down") { + test("SPARK-34562: Bloom filter push down", -+ IgnoreCometNativeScan("Native scans do not support the tested accumulator")) { ++ IgnoreCometNativeScan("https://github.com/apache/datafusion-comet/issues/3867")) { withTempPath { dir => val path = dir.getCanonicalPath spark.range(100).selectExpr("id * 2 AS id") -@@ -2305,7 +2330,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite { +@@ -2305,7 +2331,11 @@ class ParquetV1FilterSuite extends ParquetFilterSuite { assert(pushedParquetFilters.exists(_.getClass === filterClass), s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.") @@ -2904,7 +2905,7 @@ index 6080a5e8e4b..ea058d57b4b 100644 } else { assert(selectedFilters.isEmpty, "There is filter pushed down") } -@@ -2368,7 +2397,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite { +@@ -2368,7 +2398,11 @@ class ParquetV2FilterSuite extends ParquetFilterSuite { assert(pushedParquetFilters.exists(_.getClass === filterClass), s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.")