@@ -16,21 +16,18 @@ package org.apache.spark.sql.clickhouse
1616
1717import org .apache .spark .sql .AnalysisException
1818import org .apache .spark .sql .catalyst .SQLConfHelper
19- import org .apache .spark .sql .catalyst .analysis .NoSuchFunctionException
20- import org .apache .spark .sql .catalyst .expressions .{BoundReference , Expression , TransformExpression }
19+ import org .apache .spark .sql .catalyst .expressions .{BoundReference , Expression }
2120import org .apache .spark .sql .clickhouse .ClickHouseSQLConf .IGNORE_UNSUPPORTED_TRANSFORM
22- import org .apache .spark .sql .connector .catalog .Identifier
23- import org .apache .spark .sql .connector .catalog .functions .{BoundFunction , ScalarFunction , UnboundFunction }
2421import org .apache .spark .sql .connector .expressions .Expressions ._
2522import org .apache .spark .sql .connector .expressions .{Expression => V2Expression , _ }
2623import org .apache .spark .sql .types .{IntegerType , LongType , StructField , StructType }
2724import xenon .clickhouse .exception .CHClientException
2825import xenon .clickhouse .expr ._
29- import xenon .clickhouse .func .FunctionRegistry
3026
27+ import scala .annotation .tailrec
3128import scala .util .{Failure , Success , Try }
3229
33- class ExprUtils ( functionRegistry : FunctionRegistry ) extends SQLConfHelper with Serializable {
30+ object ExprUtils extends SQLConfHelper {
3431
3532 def toSparkPartitions (partitionKey : Option [List [Expr ]]): Array [Transform ] =
3633 partitionKey.seq.flatten.flatten(toSparkTransformOpt).toArray
@@ -50,28 +47,7 @@ class ExprUtils(functionRegistry: FunctionRegistry) extends SQLConfHelper with S
5047 toSparkTransformOpt(expr).map(trans => Expressions .sort(trans, direction, nullOrder))
5148 }.toArray
5249
53- private def loadV2FunctionOpt (
54- name : String ,
55- args : Seq [Expression ]
56- ): Option [BoundFunction ] = {
57- def loadFunction (ident : Identifier ): UnboundFunction =
58- functionRegistry.load(ident.name).getOrElse(throw new NoSuchFunctionException (ident))
59- val inputType = StructType (args.zipWithIndex.map {
60- case (exp, pos) => StructField (s " _ $pos" , exp.dataType, exp.nullable)
61- })
62- try {
63- val unbound = loadFunction(Identifier .of(Array .empty, name))
64- Some (unbound.bind(inputType))
65- } catch {
66- case e : NoSuchFunctionException =>
67- throw e
68- case _ : UnsupportedOperationException if conf.getConf(IGNORE_UNSUPPORTED_TRANSFORM ) =>
69- None
70- case e : UnsupportedOperationException =>
71- throw new AnalysisException (e.getMessage, cause = Some (e))
72- }
73- }
74-
50+ @ tailrec
7551 def toCatalyst (v2Expr : V2Expression , fields : Array [StructField ]): Expression =
7652 v2Expr match {
7753 case IdentityTransform (ref) => toCatalyst(ref, fields)
@@ -81,15 +57,8 @@ class ExprUtils(functionRegistry: FunctionRegistry) extends SQLConfHelper with S
8157 .find { case (field, _) => field.name == ref.fieldNames.head }
8258 .getOrElse(throw CHClientException (s " Invalid field reference: $ref" ))
8359 BoundReference (ordinal, field.dataType, field.nullable)
84- case t : Transform =>
85- val catalystArgs = t.arguments().map(toCatalyst(_, fields))
86- loadV2FunctionOpt(t.name(), catalystArgs).map { bound =>
87- TransformExpression (bound, catalystArgs)
88- }.getOrElse {
89- throw CHClientException (s " Unsupported expression: $v2Expr" )
90- }
9160 case _ => throw CHClientException (
92- s " Unsupported expression: $v2Expr"
61+ s " Unsupported V2 expression: $v2Expr, SPARK-33779: Spark 3.3 only support IdentityTransform "
9362 )
9463 }
9564
@@ -114,10 +83,10 @@ class ExprUtils(functionRegistry: FunctionRegistry) extends SQLConfHelper with S
11483 case FuncExpr (" toYYYYMMDD" , List (FieldRef (col))) => days(col)
11584 case FuncExpr (" toHour" , List (FieldRef (col))) => hours(col)
11685 case FuncExpr (" HOUR" , List (FieldRef (col))) => hours(col)
86+ // TODO support arbitrary functions
87+ // case FuncExpr("xxHash64", List(FieldRef(col))) => apply("ck_xx_hash64", column(col))
11788 case FuncExpr (" rand" , Nil ) => apply(" rand" )
11889 case FuncExpr (" toYYYYMMDD" , List (FuncExpr (" toDate" , List (FieldRef (col))))) => identity(col)
119- case FuncExpr (funName, List (FieldRef (col))) if functionRegistry.clickHouseToSparkFunc.contains(funName) =>
120- apply(functionRegistry.clickHouseToSparkFunc(funName), column(col))
12190 case unsupported => throw CHClientException (s " Unsupported ClickHouse expression: $unsupported" )
12291 }
12392
@@ -127,8 +96,7 @@ class ExprUtils(functionRegistry: FunctionRegistry) extends SQLConfHelper with S
12796 case DaysTransform (FieldReference (Seq (col))) => FuncExpr (" toYYYYMMDD" , List (FieldRef (col)))
12897 case HoursTransform (FieldReference (Seq (col))) => FuncExpr (" toHour" , List (FieldRef (col)))
12998 case IdentityTransform (fieldRefs) => FieldRef (fieldRefs.describe)
130- case ApplyTransform (name, args) if functionRegistry.sparkToClickHouseFunc.contains(name) =>
131- FuncExpr (functionRegistry.sparkToClickHouseFunc(name), args.map(arg => SQLExpr (arg.describe())).toList)
99+ case ApplyTransform (name, args) => FuncExpr (name, args.map(arg => SQLExpr (arg.describe())).toList)
132100 case bucket : BucketTransform => throw CHClientException (s " Bucket transform not support yet: $bucket" )
133101 case other : Transform => throw CHClientException (s " Unsupported transform: $other" )
134102 }
@@ -145,18 +113,8 @@ class ExprUtils(functionRegistry: FunctionRegistry) extends SQLConfHelper with S
145113 case IdentityTransform (FieldReference (Seq (col))) => primarySchema.find(_.name == col)
146114 .orElse(secondarySchema.find(_.name == col))
147115 .getOrElse(throw CHClientException (s " Invalid partition column: $col" ))
148- case t @ ApplyTransform (transformName, _) =>
149- val resType =
150- functionRegistry.load(transformName).getOrElse(throw new NoSuchFunctionException (transformName)) match {
151- case f : ScalarFunction [_] => f.resultType()
152- case other => throw CHClientException (s " Unsupported function: $other" )
153- }
154- StructField (t.toString, resType)
116+ case ckXxhHash64 @ ApplyTransform (" ck_xx_hash64" , _) => StructField (ckXxhHash64.toString, LongType )
155117 case bucket : BucketTransform => throw CHClientException (s " Bucket transform not support yet: $bucket" )
156118 case other : Transform => throw CHClientException (s " Unsupported transform: $other" )
157119 }
158120}
159-
160- object ExprUtils {
161- def apply (functionRegistry : FunctionRegistry ): ExprUtils = new ExprUtils (functionRegistry)
162- }
0 commit comments