From 1248366b70316444f03f5a18273094baddc991c1 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 2 Apr 2026 16:36:19 -0600 Subject: [PATCH 1/2] fix: mark array_compact as Compatible and improve test coverage The CometArrayCompact serde was incorrectly marked as Incompatible(None). On Spark 3.x, tests pass without allowIncompatible, confirming it matches Spark behavior. On Spark 4.0, ArrayCompact is a RuntimeReplaceable that rewrites to ArrayFilter before reaching this serde. Also expands SQL file test coverage to include string, double, and nested array element types, and upgrades existing tests from spark_answer_only to query mode to assert native execution. --- .../scala/org/apache/comet/serde/arrays.scala | 2 +- .../expressions/array/array_compact.sql | 24 ++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala b/spark/src/main/scala/org/apache/comet/serde/arrays.scala index 47a6e91421..a41db5f280 100644 --- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala +++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala @@ -313,7 +313,7 @@ object CometArrayRepeat extends CometExpressionSerde[ArrayRepeat] { object CometArrayCompact extends CometExpressionSerde[Expression] { - override def getSupportLevel(expr: Expression): SupportLevel = Incompatible(None) + override def getSupportLevel(expr: Expression): SupportLevel = Compatible() override def convert( expr: Expression, diff --git a/spark/src/test/resources/sql-tests/expressions/array/array_compact.sql b/spark/src/test/resources/sql-tests/expressions/array/array_compact.sql index 9b834a4dbd..83cd730978 100644 --- a/spark/src/test/resources/sql-tests/expressions/array/array_compact.sql +++ b/spark/src/test/resources/sql-tests/expressions/array/array_compact.sql @@ -15,7 +15,6 @@ -- specific language governing permissions and limitations -- under the License. --- ConfigMatrix: parquet.enable.dictionary=false,true statement CREATE TABLE test_array_compact(arr array) USING parquet @@ -23,9 +22,28 @@ CREATE TABLE test_array_compact(arr array) USING parquet statement INSERT INTO test_array_compact VALUES (array(1, NULL, 2, NULL, 3)), (array()), (NULL), (array(NULL, NULL)), (array(1, 2, 3)) -query spark_answer_only +-- column argument +query SELECT array_compact(arr) FROM test_array_compact -- literal arguments -query spark_answer_only +query SELECT array_compact(array(1, NULL, 2, NULL, 3)) + +-- string element type +statement +CREATE TABLE test_array_compact_str(arr array) USING parquet + +statement +INSERT INTO test_array_compact_str VALUES (array('a', NULL, 'b', NULL, 'c')), (array()), (NULL), (array(NULL, NULL)), (array('', NULL, '', NULL)) + +query +SELECT array_compact(arr) FROM test_array_compact_str + +-- double element type +query +SELECT array_compact(array(1.0, NULL, 2.0, NULL, 3.0)) + +-- nested array type (removes null arrays from outer, preserves null elements in inner) +query +SELECT array_compact(array(array(1, NULL, 3), NULL, array(NULL, 2, 3))) From 042a784b7961e0c17811bcb3052434afa232c8aa Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 2 Apr 2026 17:17:56 -0600 Subject: [PATCH 2/2] fix: handle KnownNotContainsNull in Spark 4.0 shim Spark 4.0 wraps ArrayCompact's replacement in KnownNotContainsNull, a TaggingExpression that marks containsNull=false in the schema but has no runtime effect. Pass through to the child expression so that array_compact runs natively on Spark 4.0 instead of falling back. --- .../spark-4.0/org/apache/comet/shims/CometExprShim.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala index 2c5cebd166..bf8406c428 100644 --- a/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala +++ b/spark/src/main/spark-4.0/org/apache/comet/shims/CometExprShim.scala @@ -113,6 +113,12 @@ trait CometExprShim extends CommonStringExprs { // val optExpr = scalarFunctionExprToProto("width_bucket", childExprs: _*) // optExprWithInfo(optExpr, wb, wb.children: _*) + // KnownNotContainsNull is a TaggingExpression added in Spark 4.0 that only + // changes schema metadata (containsNull = false). It has no runtime effect, + // so we pass through to the child expression. + case k: KnownNotContainsNull => + exprToProtoInternal(k.child, inputs, binding) + case _ => None } }