From e3d5feba6c0277eb786e5fe2f1308a2c1b18f281 Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Tue, 31 Mar 2026 01:12:36 -0700 Subject: [PATCH 1/7] fix: disable atan2 instead of tan --- .../source/user-guide/latest/compatibility.md | 2 +- docs/source/user-guide/latest/expressions.md | 84 +++++++++---------- .../apache/comet/serde/QueryPlanSerde.scala | 2 +- .../scala/org/apache/comet/serde/math.scala | 24 ++---- .../apache/comet/CometExpressionSuite.scala | 27 ++++-- 5 files changed, 72 insertions(+), 67 deletions(-) diff --git a/docs/source/user-guide/latest/compatibility.md b/docs/source/user-guide/latest/compatibility.md index 3ec6656187..c7b7bfb00f 100644 --- a/docs/source/user-guide/latest/compatibility.md +++ b/docs/source/user-guide/latest/compatibility.md @@ -88,7 +88,7 @@ the [Comet Supported Expressions Guide](expressions.md) for more information on - **Ceil, Floor**: Incorrect results for Decimal type inputs. [#1729](https://github.com/apache/datafusion-comet/issues/1729) -- **Tan**: `tan(-0.0)` produces `0.0` instead of `-0.0`. +- **Atan2**: `atan2(-0.0, -0.0)` produces `-3.141592653589793` instead of `0.0`. [#1897](https://github.com/apache/datafusion-comet/issues/1897) ### Aggregate Expressions diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index c3aca6f67a..740bfa7edb 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -121,48 +121,48 @@ Expressions that are not Spark-compatible will fall back to Spark by default and ## Math Expressions -| Expression | SQL | Spark-Compatible? | Compatibility Notes | -| -------------- | --------- | ----------------- | ----------------------------------------------------------------------------------------------------------- | -| Abs | `abs` | Yes | | -| Acos | `acos` | Yes | | -| Add | `+` | Yes | | -| Asin | `asin` | Yes | | -| Atan | `atan` | Yes | | -| Atan2 | `atan2` | Yes | | -| BRound | `bround` | Yes | | -| Ceil | `ceil` | No | Incorrect results for Decimal type inputs ([#1729](https://github.com/apache/datafusion-comet/issues/1729)) | -| Cos | `cos` | Yes | | -| Cosh | `cosh` | Yes | | -| Cot | `cot` | Yes | | -| Divide | `/` | Yes | | -| Exp | `exp` | Yes | | -| Expm1 | `expm1` | Yes | | -| Floor | `floor` | No | Incorrect results for Decimal type inputs ([#1729](https://github.com/apache/datafusion-comet/issues/1729)) | -| Hex | `hex` | Yes | | -| IntegralDivide | `div` | Yes | | -| IsNaN | `isnan` | Yes | | -| Log | `log` | Yes | | -| Log2 | `log2` | Yes | | -| Log10 | `log10` | Yes | | -| Multiply | `*` | Yes | | -| Pow | `power` | Yes | | -| Rand | `rand` | Yes | | -| Randn | `randn` | Yes | | -| Remainder | `%` | Yes | | -| Round | `round` | Yes | | -| Signum | `signum` | Yes | | -| Sin | `sin` | Yes | | -| Sinh | `sinh` | Yes | | -| Sqrt | `sqrt` | Yes | | -| Subtract | `-` | Yes | | -| Tan | `tan` | No | tan(-0.0) produces incorrect result ([#1897](https://github.com/apache/datafusion-comet/issues/1897)) | -| Tanh | `tanh` | Yes | | -| TryAdd | `try_add` | Yes | Only integer inputs are supported | -| TryDivide | `try_div` | Yes | Only integer inputs are supported | -| TryMultiply | `try_mul` | Yes | Only integer inputs are supported | -| TrySubtract | `try_sub` | Yes | Only integer inputs are supported | -| UnaryMinus | `-` | Yes | | -| Unhex | `unhex` | Yes | | +| Expression | SQL | Spark-Compatible? | Compatibility Notes | +| -------------- | --------- | ----------------- |------------------------------------------------------------------------------------------------------------------| +| Abs | `abs` | Yes | | +| Acos | `acos` | Yes | | +| Add | `+` | Yes | | +| Asin | `asin` | Yes | | +| Atan | `atan` | Yes | | +| Atan2 | `atan2` | No | atan2(-0.0, -0.0) produces incompatible result ([#1897](https://github.com/apache/datafusion-comet/issues/1897)) | +| BRound | `bround` | Yes | | +| Ceil | `ceil` | No | Incorrect results for Decimal type inputs ([#1729](https://github.com/apache/datafusion-comet/issues/1729)) | +| Cos | `cos` | Yes | | +| Cosh | `cosh` | Yes | | +| Cot | `cot` | Yes | | +| Divide | `/` | Yes | | +| Exp | `exp` | Yes | | +| Expm1 | `expm1` | Yes | | +| Floor | `floor` | No | Incorrect results for Decimal type inputs ([#1729](https://github.com/apache/datafusion-comet/issues/1729)) | +| Hex | `hex` | Yes | | +| IntegralDivide | `div` | Yes | | +| IsNaN | `isnan` | Yes | | +| Log | `log` | Yes | | +| Log2 | `log2` | Yes | | +| Log10 | `log10` | Yes | | +| Multiply | `*` | Yes | | +| Pow | `power` | Yes | | +| Rand | `rand` | Yes | | +| Randn | `randn` | Yes | | +| Remainder | `%` | Yes | | +| Round | `round` | Yes | | +| Signum | `signum` | Yes | | +| Sin | `sin` | Yes | | +| Sinh | `sinh` | Yes | | +| Sqrt | `sqrt` | Yes | | +| Subtract | `-` | Yes | | +| Tan | `tan` | Yes | | +| Tanh | `tanh` | Yes | | +| TryAdd | `try_add` | Yes | Only integer inputs are supported | +| TryDivide | `try_div` | Yes | Only integer inputs are supported | +| TryMultiply | `try_mul` | Yes | Only integer inputs are supported | +| TrySubtract | `try_sub` | Yes | Only integer inputs are supported | +| UnaryMinus | `-` | Yes | | +| Unhex | `unhex` | Yes | | ## Hashing Functions diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala index 02a76f69f0..c8a3e473a9 100644 --- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala +++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala @@ -116,7 +116,7 @@ object QueryPlanSerde extends Logging with CometExprShim { classOf[Sinh] -> CometScalarFunction("sinh"), classOf[Sqrt] -> CometScalarFunction("sqrt"), classOf[Subtract] -> CometSubtract, - classOf[Tan] -> CometTan, + classOf[Tan] -> CometScalarFunction("tan"), classOf[Tanh] -> CometScalarFunction("tanh"), classOf[Cot] -> CometScalarFunction("cot"), classOf[UnaryMinus] -> CometUnaryMinus, diff --git a/spark/src/main/scala/org/apache/comet/serde/math.scala b/spark/src/main/scala/org/apache/comet/serde/math.scala index 5a0393142a..a0f6e22a6c 100644 --- a/spark/src/main/scala/org/apache/comet/serde/math.scala +++ b/spark/src/main/scala/org/apache/comet/serde/math.scala @@ -26,6 +26,12 @@ import org.apache.comet.CometSparkSessionExtensions.withInfo import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType} object CometAtan2 extends CometExpressionSerde[Atan2] { + override def getSupportLevel(expr: Atan2): SupportLevel = + Incompatible( + Some( + "atan2(-0.0, -0.0) produces incompatible result" + + " (https://github.com/apache/datafusion-comet/issues/1897)")) + override def convert( expr: Atan2, inputs: Seq[Attribute], @@ -198,24 +204,6 @@ object CometAbs extends CometExpressionSerde[Abs] with MathExprBase { } } -object CometTan extends CometExpressionSerde[Tan] { - - override def getSupportLevel(expr: Tan): SupportLevel = - Incompatible( - Some( - "tan(-0.0) produces incorrect result" + - " (https://github.com/apache/datafusion-comet/issues/1897)")) - - override def convert( - expr: Tan, - inputs: Seq[Attribute], - binding: Boolean): Option[ExprOuterClass.Expr] = { - val childExpr = expr.children.map(exprToProtoInternal(_, inputs, binding)) - val optExpr = scalarFunctionExprToProto("tan", childExpr: _*) - optExprWithInfo(optExpr, expr, expr.children: _*) - } -} - sealed trait MathExprBase { protected def nullIfNegative(expression: Expression): Expression = { val zero = Literal.default(expression.dataType) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 9fdd5a6777..63050dee6b 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -28,7 +28,7 @@ import org.scalatest.Tag import org.apache.hadoop.fs.Path import org.apache.spark.sql.{CometTestBase, DataFrame, Row} -import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Ceil, Floor, FromUnixTime, Literal, StructsToJson, Tan, TruncDate, TruncTimestamp} +import org.apache.spark.sql.catalyst.expressions.{Alias, Atan2, Cast, Ceil, Floor, FromUnixTime, Literal, StructsToJson, TruncDate, TruncTimestamp} import org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps import org.apache.spark.sql.comet.CometProjectExec import org.apache.spark.sql.execution.{ProjectExec, SparkPlan} @@ -1333,8 +1333,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { private val doubleValues: Seq[Double] = Seq( -1.0, - // TODO we should eventually enable negative zero but there are known issues still - // -0.0, + -0.0, 0.0, +1.0, Double.MinValue, @@ -1346,7 +1345,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { test("various math scalar functions") { val data = doubleValues.map(n => (n, n)) - withSQLConf(CometConf.getExprAllowIncompatConfigKey(classOf[Tan]) -> "true") { + withSQLConf() { withParquetTable(data, "tbl") { // expressions with single arg for (expr <- Seq( @@ -1373,7 +1372,25 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { assert(cometProjectExecs.length == 1, expr) } // expressions with two args - for (expr <- Seq("atan2", "pow")) { + for (expr <- Seq("pow")) { + val (_, cometPlan) = + checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1, _2) FROM tbl")) + val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => + op + } + assert(cometProjectExecs.length == 1, expr) + } + } + } + } + + test("atan2 math scalar functions") { + // TODO we should eventually include negative zero but there are known issues still + val data = doubleValues.filter(n => java.lang.Double.compare(n, -0.0d) == 1).map(n => (n, n)) + withSQLConf(CometConf.getExprAllowIncompatConfigKey(classOf[Atan2]) -> "true") { + withParquetTable(data, "tbl") { + // expressions with two args + for (expr <- Seq("atan2")) { val (_, cometPlan) = checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1, _2) FROM tbl")) val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => From 43ab9df58548c5915d2fc71b4c36581d71216ddc Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Tue, 31 Mar 2026 01:25:11 -0700 Subject: [PATCH 2/7] doc --- docs/source/user-guide/latest/expressions.md | 2 +- .../src/test/scala/org/apache/comet/CometExpressionSuite.scala | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index 740bfa7edb..c5da72b14f 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -122,7 +122,7 @@ Expressions that are not Spark-compatible will fall back to Spark by default and ## Math Expressions | Expression | SQL | Spark-Compatible? | Compatibility Notes | -| -------------- | --------- | ----------------- |------------------------------------------------------------------------------------------------------------------| +| -------------- | --------- | ----------------- | ---------------------------------------------------------------------------------------------------------------- | | Abs | `abs` | Yes | | | Acos | `acos` | Yes | | | Add | `+` | Yes | | diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 63050dee6b..a8860f73c9 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -1386,6 +1386,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { test("atan2 math scalar functions") { // TODO we should eventually include negative zero but there are known issues still + // https://github.com/apache/datafusion-comet/issues/1897 val data = doubleValues.filter(n => java.lang.Double.compare(n, -0.0d) == 1).map(n => (n, n)) withSQLConf(CometConf.getExprAllowIncompatConfigKey(classOf[Atan2]) -> "true") { withParquetTable(data, "tbl") { From 1c0d56f7d7ff0ad6340306f04a1273ab32dcdd4e Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Tue, 31 Mar 2026 15:04:53 -0700 Subject: [PATCH 3/7] address review comments --- spark/src/main/scala/org/apache/comet/serde/math.scala | 2 +- spark/src/test/resources/sql-tests/expressions/math/atan2.sql | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/spark/src/main/scala/org/apache/comet/serde/math.scala b/spark/src/main/scala/org/apache/comet/serde/math.scala index a0f6e22a6c..ce87433fe9 100644 --- a/spark/src/main/scala/org/apache/comet/serde/math.scala +++ b/spark/src/main/scala/org/apache/comet/serde/math.scala @@ -19,7 +19,7 @@ package org.apache.comet.serde -import org.apache.spark.sql.catalyst.expressions.{Abs, Atan2, Attribute, Ceil, CheckOverflow, Expression, Floor, Hex, If, LessThanOrEqual, Literal, Log, Log10, Log2, Tan, Unhex} +import org.apache.spark.sql.catalyst.expressions.{Abs, Atan2, Attribute, Ceil, CheckOverflow, Expression, Floor, Hex, If, LessThanOrEqual, Literal, Log, Log10, Log2, Unhex} import org.apache.spark.sql.types.{DecimalType, NumericType} import org.apache.comet.CometSparkSessionExtensions.withInfo diff --git a/spark/src/test/resources/sql-tests/expressions/math/atan2.sql b/spark/src/test/resources/sql-tests/expressions/math/atan2.sql index 7a912930b8..6ef947e2f4 100644 --- a/spark/src/test/resources/sql-tests/expressions/math/atan2.sql +++ b/spark/src/test/resources/sql-tests/expressions/math/atan2.sql @@ -15,6 +15,7 @@ -- specific language governing permissions and limitations -- under the License. +-- Config: spark.comet.expression.Atan2.allowIncompatible=true -- ConfigMatrix: parquet.enable.dictionary=false,true statement From f5fa6169e7d36f861d05a1ea6470ab95a5cd1fbf Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Thu, 2 Apr 2026 13:24:51 -0700 Subject: [PATCH 4/7] address review comments --- .../sql-tests/expressions/math/tan.sql | 4 +- .../apache/comet/CometExpressionSuite.scala | 68 +++++++++---------- 2 files changed, 35 insertions(+), 37 deletions(-) diff --git a/spark/src/test/resources/sql-tests/expressions/math/tan.sql b/spark/src/test/resources/sql-tests/expressions/math/tan.sql index 9496844804..dd03ffb620 100644 --- a/spark/src/test/resources/sql-tests/expressions/math/tan.sql +++ b/spark/src/test/resources/sql-tests/expressions/math/tan.sql @@ -22,11 +22,11 @@ statement CREATE TABLE test_tan(d double) USING parquet statement -INSERT INTO test_tan VALUES (0.0), (0.7853981633974483), (-0.7853981633974483), (1.0), (NULL), (cast('NaN' as double)), (cast('Infinity' as double)) +INSERT INTO test_tan VALUES (0.0), (-0.0), (0.7853981633974483), (-0.7853981633974483), (1.0), (NULL), (cast('NaN' as double)), (cast('Infinity' as double)) query tolerance=1e-6 SELECT tan(d) FROM test_tan -- literal arguments query tolerance=1e-6 -SELECT tan(0.0), tan(0.7853981633974483), tan(NULL) +SELECT tan(0.0), tan(-0.0), tan(0.7853981633974483), tan(NULL) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 6d07c986d6..4a07de5ecf 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -1345,41 +1345,39 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { test("various math scalar functions") { val data = doubleValues.map(n => (n, n)) - withSQLConf() { - withParquetTable(data, "tbl") { - // expressions with single arg - for (expr <- Seq( - "acos", - "asin", - "atan", - "cos", - "cosh", - "exp", - "ln", - "log10", - "log2", - "sin", - "sinh", - "sqrt", - "tan", - "tanh", - "cot")) { - val (_, cometPlan) = - checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1), $expr(_2) FROM tbl")) - val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => - op - } - assert(cometProjectExecs.length == 1, expr) - } - // expressions with two args - for (expr <- Seq("pow")) { - val (_, cometPlan) = - checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1, _2) FROM tbl")) - val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => - op - } - assert(cometProjectExecs.length == 1, expr) - } + withParquetTable(data, "tbl") { + // expressions with single arg + for (expr <- Seq( + "acos", + "asin", + "atan", + "cos", + "cosh", + "exp", + "ln", + "log10", + "log2", + "sin", + "sinh", + "sqrt", + "tan", + "tanh", + "cot")) { + val (_, cometPlan) = + checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1), $expr(_2) FROM tbl")) + val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => + op + } + assert(cometProjectExecs.length == 1, expr) + } + // expressions with two args + for (expr <- Seq("pow")) { + val (_, cometPlan) = + checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1, _2) FROM tbl")) + val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => + op + } + assert(cometProjectExecs.length == 1, expr) } } } From a35db1e45486bdf45937eb333e2134e0fb877679 Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Fri, 3 Apr 2026 09:40:22 -0700 Subject: [PATCH 5/7] address review comments --- .../source/user-guide/latest/compatibility.md | 5 -- docs/source/user-guide/latest/expressions.md | 84 +++++++++---------- .../scala/org/apache/comet/serde/math.scala | 14 ++-- .../sql-tests/expressions/math/atan2.sql | 18 +++- .../apache/comet/CometExpressionSuite.scala | 28 ++----- 5 files changed, 66 insertions(+), 83 deletions(-) diff --git a/docs/source/user-guide/latest/compatibility.md b/docs/source/user-guide/latest/compatibility.md index 9670ba18b1..906fac0179 100644 --- a/docs/source/user-guide/latest/compatibility.md +++ b/docs/source/user-guide/latest/compatibility.md @@ -80,11 +80,6 @@ the [Comet Supported Expressions Guide](expressions.md) for more information on timezone is UTC. [#2649](https://github.com/apache/datafusion-comet/issues/2649) -### Math Expressions - -- **Atan2**: `atan2(-0.0, -0.0)` produces `-3.141592653589793` instead of `0.0`. - [#1897](https://github.com/apache/datafusion-comet/issues/1897) - ### Aggregate Expressions - **Corr**: Returns null instead of NaN in some edge cases. diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index 5aca034198..bef31b4095 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -121,48 +121,48 @@ Expressions that are not Spark-compatible will fall back to Spark by default and ## Math Expressions -| Expression | SQL | Spark-Compatible? | Compatibility Notes | -| -------------- | --------- | ----------------- | ---------------------------------------------------------------------------------------------------------------- | -| Abs | `abs` | Yes | | -| Acos | `acos` | Yes | | -| Add | `+` | Yes | | -| Asin | `asin` | Yes | | -| Atan | `atan` | Yes | | -| Atan2 | `atan2` | No | atan2(-0.0, -0.0) produces incompatible result ([#1897](https://github.com/apache/datafusion-comet/issues/1897)) | -| BRound | `bround` | Yes | | -| Ceil | `ceil` | Yes | | -| Cos | `cos` | Yes | | -| Cosh | `cosh` | Yes | | -| Cot | `cot` | Yes | | -| Divide | `/` | Yes | | -| Exp | `exp` | Yes | | -| Expm1 | `expm1` | Yes | | -| Floor | `floor` | Yes | | -| Hex | `hex` | Yes | | -| IntegralDivide | `div` | Yes | | -| IsNaN | `isnan` | Yes | | -| Log | `log` | Yes | | -| Log2 | `log2` | Yes | | -| Log10 | `log10` | Yes | | -| Multiply | `*` | Yes | | -| Pow | `power` | Yes | | -| Rand | `rand` | Yes | | -| Randn | `randn` | Yes | | -| Remainder | `%` | Yes | | -| Round | `round` | Yes | | -| Signum | `signum` | Yes | | -| Sin | `sin` | Yes | | -| Sinh | `sinh` | Yes | | -| Sqrt | `sqrt` | Yes | | -| Subtract | `-` | Yes | | -| Tan | `tan` | Yes | | -| Tanh | `tanh` | Yes | | -| TryAdd | `try_add` | Yes | Only integer inputs are supported | -| TryDivide | `try_div` | Yes | Only integer inputs are supported | -| TryMultiply | `try_mul` | Yes | Only integer inputs are supported | -| TrySubtract | `try_sub` | Yes | Only integer inputs are supported | -| UnaryMinus | `-` | Yes | | -| Unhex | `unhex` | Yes | | +| Expression | SQL | Spark-Compatible? | Compatibility Notes | +| -------------- | --------- | ----------------- | --------------------------------- | +| Abs | `abs` | Yes | | +| Acos | `acos` | Yes | | +| Add | `+` | Yes | | +| Asin | `asin` | Yes | | +| Atan | `atan` | Yes | | +| Atan2 | `atan2` | No | | +| BRound | `bround` | Yes | | +| Ceil | `ceil` | Yes | | +| Cos | `cos` | Yes | | +| Cosh | `cosh` | Yes | | +| Cot | `cot` | Yes | | +| Divide | `/` | Yes | | +| Exp | `exp` | Yes | | +| Expm1 | `expm1` | Yes | | +| Floor | `floor` | Yes | | +| Hex | `hex` | Yes | | +| IntegralDivide | `div` | Yes | | +| IsNaN | `isnan` | Yes | | +| Log | `log` | Yes | | +| Log2 | `log2` | Yes | | +| Log10 | `log10` | Yes | | +| Multiply | `*` | Yes | | +| Pow | `power` | Yes | | +| Rand | `rand` | Yes | | +| Randn | `randn` | Yes | | +| Remainder | `%` | Yes | | +| Round | `round` | Yes | | +| Signum | `signum` | Yes | | +| Sin | `sin` | Yes | | +| Sinh | `sinh` | Yes | | +| Sqrt | `sqrt` | Yes | | +| Subtract | `-` | Yes | | +| Tan | `tan` | Yes | | +| Tanh | `tanh` | Yes | | +| TryAdd | `try_add` | Yes | Only integer inputs are supported | +| TryDivide | `try_div` | Yes | Only integer inputs are supported | +| TryMultiply | `try_mul` | Yes | Only integer inputs are supported | +| TrySubtract | `try_sub` | Yes | Only integer inputs are supported | +| UnaryMinus | `-` | Yes | | +| Unhex | `unhex` | Yes | | ## Hashing Functions diff --git a/spark/src/main/scala/org/apache/comet/serde/math.scala b/spark/src/main/scala/org/apache/comet/serde/math.scala index 44ac446bec..a03fa07d82 100644 --- a/spark/src/main/scala/org/apache/comet/serde/math.scala +++ b/spark/src/main/scala/org/apache/comet/serde/math.scala @@ -19,25 +19,21 @@ package org.apache.comet.serde -import org.apache.spark.sql.catalyst.expressions.{Abs, Atan2, Attribute, Ceil, CheckOverflow, Expression, Floor, Hex, If, LessThanOrEqual, Literal, Log, Log10, Log2, Logarithm, Unhex} +import org.apache.spark.sql.catalyst.expressions.{Abs, Add, Atan2, Attribute, Ceil, CheckOverflow, Expression, Floor, Hex, If, LessThanOrEqual, Literal, Log, Log10, Log2, Logarithm, Unhex} import org.apache.spark.sql.types.{DecimalType, DoubleType, NumericType} import org.apache.comet.CometSparkSessionExtensions.withInfo import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal, optExprWithInfo, scalarFunctionExprToProto, scalarFunctionExprToProtoWithReturnType, serializeDataType} object CometAtan2 extends CometExpressionSerde[Atan2] { - override def getSupportLevel(expr: Atan2): SupportLevel = - Incompatible( - Some( - "atan2(-0.0, -0.0) produces incompatible result" + - " (https://github.com/apache/datafusion-comet/issues/1897)")) - override def convert( expr: Atan2, inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { - val leftExpr = exprToProtoInternal(expr.left, inputs, binding) - val rightExpr = exprToProtoInternal(expr.right, inputs, binding) + val left = Add(expr.left, Literal.default(expr.left.dataType)) + val right = Add(expr.right, Literal.default(expr.right.dataType)) + val leftExpr = exprToProtoInternal(left, inputs, binding) + val rightExpr = exprToProtoInternal(right, inputs, binding) val optExpr = scalarFunctionExprToProto("atan2", leftExpr, rightExpr) optExprWithInfo(optExpr, expr, expr.left, expr.right) } diff --git a/spark/src/test/resources/sql-tests/expressions/math/atan2.sql b/spark/src/test/resources/sql-tests/expressions/math/atan2.sql index 6ef947e2f4..47c7fe8ba5 100644 --- a/spark/src/test/resources/sql-tests/expressions/math/atan2.sql +++ b/spark/src/test/resources/sql-tests/expressions/math/atan2.sql @@ -15,14 +15,21 @@ -- specific language governing permissions and limitations -- under the License. --- Config: spark.comet.expression.Atan2.allowIncompatible=true -- ConfigMatrix: parquet.enable.dictionary=false,true statement CREATE TABLE test_atan2(y double, x double) USING parquet statement -INSERT INTO test_atan2 VALUES (0.0, 1.0), (1.0, 0.0), (1.0, 1.0), (-1.0, -1.0), (0.0, 0.0), (NULL, 1.0), (1.0, NULL), (cast('NaN' as double), 1.0), (cast('Infinity' as double), 1.0) +INSERT INTO test_atan2 VALUES + (0.0, 0.0), (0.0, -0.0), (0.0, 1.0), (0.0, -1.0), (0.0, NULL), (0.0, cast('NaN' as double)), (0.0, cast('Infinity' as double)), (0.0, cast('-Infinity' as double)), + (-0.0, 0.0), (-0.0, -0.0), (-0.0, 1.0), (-0.0, -1.0), (-0.0, NULL), (-0.0, cast('NaN' as double)), (-0.0, cast('Infinity' as double)), (-0.0, cast('-Infinity' as double)), + (1.0, 0.0), (1.0, -0.0), (1.0, 1.0), (1.0, -1.0), (1.0, NULL), (1.0, cast('NaN' as double)), (1.0, cast('Infinity' as double)), (1.0, cast('-Infinity' as double)), + (-1.0, 0.0), (-1.0, -0.0), (-1.0, 1.0), (-1.0, -1.0), (-1.0, NULL), (-1.0, cast('NaN' as double)), (-1.0, cast('Infinity' as double)), (-1.0, cast('-Infinity' as double)), + (NULL, 0.0), (NULL, -0.0), (NULL, 1.0), (NULL, -1.0), (NULL, NULL), (NULL, cast('NaN' as double)), (NULL, cast('Infinity' as double)), (NULL, cast('-Infinity' as double)), + (cast('NaN' as double), 0.0), (cast('NaN' as double), -0.0), (cast('NaN' as double), 1.0), (cast('NaN' as double), -1.0), (cast('NaN' as double), NULL), (cast('NaN' as double), cast('NaN' as double)), (cast('NaN' as double), cast('Infinity' as double)), (cast('NaN' as double), cast('-Infinity' as double)), + (cast('Infinity' as double), 0.0), (cast('Infinity' as double), -0.0), (cast('Infinity' as double), 1.0), (cast('Infinity' as double), -1.0), (cast('Infinity' as double), NULL), (cast('Infinity' as double), cast('NaN' as double)), (cast('Infinity' as double), cast('Infinity' as double)), (cast('Infinity' as double), cast('-Infinity' as double)), + (cast('-Infinity' as double), 0.0), (cast('-Infinity' as double), -0.0), (cast('-Infinity' as double), 1.0), (cast('-Infinity' as double), -1.0), (cast('-Infinity' as double), NULL), (cast('-Infinity' as double), cast('NaN' as double)), (cast('-Infinity' as double), cast('Infinity' as double)), (cast('-Infinity' as double), cast('-Infinity' as double)) query tolerance=1e-6 SELECT atan2(y, x) FROM test_atan2 @@ -35,6 +42,9 @@ SELECT atan2(y, 1.0) FROM test_atan2 query tolerance=1e-6 SELECT atan2(1.0, x) FROM test_atan2 --- literal + literal +-- literal permutations query tolerance=1e-6 -SELECT atan2(1.0, 1.0), atan2(0.0, 0.0), atan2(-1.0, -1.0), atan2(NULL, 1.0) +SELECT atan2(0.0, 0.0), atan2(0.0, -0.0), atan2(0.0, 1.0), atan2(0.0, -1.0), + atan2(-0.0, 0.0), atan2(-0.0, -0.0), atan2(-0.0, 1.0), atan2(-0.0, -1.0), + atan2(1.0, 0.0), atan2(1.0, -0.0), atan2(1.0, 1.0), atan2(1.0, -1.0), + atan2(-1.0, 0.0), atan2(-1.0, -0.0), atan2(-1.0, 1.0), atan2(-1.0, -1.0) diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala index 4a07de5ecf..78a19983d9 100644 --- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala +++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala @@ -28,7 +28,7 @@ import org.scalatest.Tag import org.apache.hadoop.fs.Path import org.apache.spark.sql.{CometTestBase, DataFrame, Row} -import org.apache.spark.sql.catalyst.expressions.{Alias, Atan2, Cast, FromUnixTime, Literal, StructsToJson, TruncDate, TruncTimestamp} +import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, FromUnixTime, Literal, StructsToJson, TruncDate, TruncTimestamp} import org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps import org.apache.spark.sql.comet.CometProjectExec import org.apache.spark.sql.execution.{ProjectExec, SparkPlan} @@ -1344,8 +1344,7 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { Double.NegativeInfinity) test("various math scalar functions") { - val data = doubleValues.map(n => (n, n)) - withParquetTable(data, "tbl") { + withParquetTable(doubleValues.map(n => (n, n)), "tbl") { // expressions with single arg for (expr <- Seq( "acos", @@ -1370,8 +1369,10 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } assert(cometProjectExecs.length == 1, expr) } + } + withParquetTable(doubleValues.flatMap(m => doubleValues.map(n => (m, n))), "tbl") { // expressions with two args - for (expr <- Seq("pow")) { + for (expr <- Seq("atan2", "pow")) { val (_, cometPlan) = checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1, _2) FROM tbl")) val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => @@ -1382,25 +1383,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper { } } - test("atan2 math scalar functions") { - // TODO we should eventually include negative zero but there are known issues still - // https://github.com/apache/datafusion-comet/issues/1897 - val data = doubleValues.filter(n => java.lang.Double.compare(n, -0.0d) == 1).map(n => (n, n)) - withSQLConf(CometConf.getExprAllowIncompatConfigKey(classOf[Atan2]) -> "true") { - withParquetTable(data, "tbl") { - // expressions with two args - for (expr <- Seq("atan2")) { - val (_, cometPlan) = - checkSparkAnswerAndOperatorWithTol(sql(s"SELECT $expr(_1, _2) FROM tbl")) - val cometProjectExecs = collect(cometPlan) { case op: CometProjectExec => - op - } - assert(cometProjectExecs.length == 1, expr) - } - } - } - } - private def testDoubleScalarExpr(expr: String): Unit = { val testValuesRepeated = doubleValues.flatMap(v => Seq.fill(1000)(v)) for (withDictionary <- Seq(true, false)) { From ff6d9639bcd0650cf235976be544c3dcaa5d9aae Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Fri, 3 Apr 2026 11:07:05 -0700 Subject: [PATCH 6/7] address review comments --- docs/source/user-guide/latest/expressions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md index bef31b4095..9c28cb4ddb 100644 --- a/docs/source/user-guide/latest/expressions.md +++ b/docs/source/user-guide/latest/expressions.md @@ -128,7 +128,7 @@ Expressions that are not Spark-compatible will fall back to Spark by default and | Add | `+` | Yes | | | Asin | `asin` | Yes | | | Atan | `atan` | Yes | | -| Atan2 | `atan2` | No | | +| Atan2 | `atan2` | Yes | | | BRound | `bround` | Yes | | | Ceil | `ceil` | Yes | | | Cos | `cos` | Yes | | From d4bb6fffc2d57ca068bfe9c05d66bbb4049e4b72 Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Fri, 3 Apr 2026 11:09:25 -0700 Subject: [PATCH 7/7] address review comments --- spark/src/main/scala/org/apache/comet/serde/math.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/spark/src/main/scala/org/apache/comet/serde/math.scala b/spark/src/main/scala/org/apache/comet/serde/math.scala index a03fa07d82..a01d4cdf9d 100644 --- a/spark/src/main/scala/org/apache/comet/serde/math.scala +++ b/spark/src/main/scala/org/apache/comet/serde/math.scala @@ -30,6 +30,7 @@ object CometAtan2 extends CometExpressionSerde[Atan2] { expr: Atan2, inputs: Seq[Attribute], binding: Boolean): Option[ExprOuterClass.Expr] = { + // Spark adds +0.0 to inputs in order to convert -0.0 to +0.0 val left = Add(expr.left, Literal.default(expr.left.dataType)) val right = Add(expr.right, Literal.default(expr.right.dataType)) val leftExpr = exprToProtoInternal(left, inputs, binding)