From dd643d493418a35108b0a7e84c258c560594eb64 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Fri, 3 Apr 2026 18:30:58 -0700 Subject: [PATCH 1/3] Wire 6 native scalar functions: ascii, bit_length, chr, translate, replace, date_trunc These functions already had protobuf enum values and native Rust/DataFusion implementations but were missing the Scala converter wiring in NativeConverters.scala. Each is a verified Spark catalyst expression class (not RuntimeReplaceable) available in Spark 3.3+. --- .../org/apache/auron/AuronFunctionSuite.scala | 58 +++++++++++++++++++ .../spark/sql/auron/NativeConverters.scala | 11 ++++ 2 files changed, 69 insertions(+) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala index ef07ce3a3..dd4f3e9de 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala @@ -685,4 +685,62 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { } } } + + test("ascii function") { + withTable("t1") { + sql("create table t1(c1 string) using parquet") + sql("insert into t1 values('A'), ('abc'), (''), (null)") + checkSparkAnswerAndOperator("select ascii(c1) from t1") + } + } + + test("bit_length function") { + withTable("t1") { + sql("create table t1(c1 string) using parquet") + sql("insert into t1 values('hello'), (''), (null), ('世界')") + checkSparkAnswerAndOperator("select bit_length(c1) from t1") + } + } + + test("chr function") { + withTable("t1") { + sql("create table t1(c1 bigint) using parquet") + sql("insert into t1 values(65), (97), (48), (null)") + checkSparkAnswerAndOperator("select chr(c1) from t1") + } + } + + test("translate function") { + withTable("t1") { + sql("create table t1(c1 string) using parquet") + sql("insert into t1 values('AaBbCc'), ('hello'), (''), (null)") + checkSparkAnswerAndOperator("select translate(c1, 'ABC', 'xyz') from t1") + } + } + + test("replace function") { + withTable("t1") { + sql("create table t1(c1 string) using parquet") + sql("insert into t1 values('hello world'), ('aaa'), (''), (null)") + checkSparkAnswerAndOperator("select replace(c1, 'world', 'spark') from t1") + checkSparkAnswerAndOperator("select replace(c1, 'a', '') from t1") + } + } + + test("date_trunc function") { + withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") { + withTable("t1") { + sql("create table t1(c1 timestamp) using parquet") + sql("""insert into t1 values + | (timestamp'2024-03-15 14:30:45'), + | (timestamp'2024-12-31 23:59:59'), + | (null) + |""".stripMargin) + checkSparkAnswerAndOperator("select date_trunc('year', c1) from t1") + checkSparkAnswerAndOperator("select date_trunc('month', c1) from t1") + checkSparkAnswerAndOperator("select date_trunc('day', c1) from t1") + checkSparkAnswerAndOperator("select date_trunc('hour', c1) from t1") + } + } + } } diff --git a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala index 68007c837..dbe31519e 100644 --- a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala +++ b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala @@ -891,6 +891,17 @@ object NativeConverters extends Logging { buildScalarFunction(pb.ScalarFunction.FindInSet, e.children, e.dataType) case e: Abs if e.dataType.isInstanceOf[FloatType] || e.dataType.isInstanceOf[DoubleType] => buildScalarFunction(pb.ScalarFunction.Abs, e.children, e.dataType) + case e: Ascii => buildScalarFunction(pb.ScalarFunction.Ascii, e.children, e.dataType) + case e: BitLength => + buildScalarFunction(pb.ScalarFunction.BitLength, e.children, e.dataType) + case e: Chr => buildScalarFunction(pb.ScalarFunction.Chr, e.children, e.dataType) + case e: StringTranslate => + buildScalarFunction(pb.ScalarFunction.Translate, e.children, e.dataType) + case e: StringReplace => + buildScalarFunction(pb.ScalarFunction.Replace, e.children, e.dataType) + case e: TruncTimestamp => + buildScalarFunction(pb.ScalarFunction.DateTrunc, e.children, e.dataType) + case e: OctetLength => buildScalarFunction(pb.ScalarFunction.OctetLength, e.children, e.dataType) case Length(arg) if arg.dataType == StringType => From cc3f1f5ed015f4aa771153449dad04b7eee41c8e Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Fri, 3 Apr 2026 18:32:40 -0700 Subject: [PATCH 2/3] Remove non-ASCII character from bit_length test data --- .../src/test/scala/org/apache/auron/AuronFunctionSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala index dd4f3e9de..abac3c319 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala @@ -697,7 +697,7 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { test("bit_length function") { withTable("t1") { sql("create table t1(c1 string) using parquet") - sql("insert into t1 values('hello'), (''), (null), ('世界')") + sql("insert into t1 values('hello'), (''), (null), ('longer string')") checkSparkAnswerAndOperator("select bit_length(c1) from t1") } } From acc77248e9ab70e27205559fe5612037ac4c0036 Mon Sep 17 00:00:00 2001 From: ShreyeshArangath Date: Fri, 3 Apr 2026 18:33:24 -0700 Subject: [PATCH 3/3] Use unicode escape for multi-byte character in bit_length test --- .../src/test/scala/org/apache/auron/AuronFunctionSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala index abac3c319..1688ff617 100644 --- a/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala +++ b/spark-extension-shims-spark/src/test/scala/org/apache/auron/AuronFunctionSuite.scala @@ -697,7 +697,7 @@ class AuronFunctionSuite extends AuronQueryTest with BaseAuronSQLSuite { test("bit_length function") { withTable("t1") { sql("create table t1(c1 string) using parquet") - sql("insert into t1 values('hello'), (''), (null), ('longer string')") + sql("insert into t1 values('hello'), (''), (null), ('caf\\u00e9')") checkSparkAnswerAndOperator("select bit_length(c1) from t1") } }