@@ -19,32 +19,41 @@ import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, ScalarFu
1919import org .apache .spark .sql .types ._
2020import org .apache .spark .unsafe .types .UTF8String
2121
22- abstract class MultiArgsHash extends UnboundFunction with ClickhouseEquivFunction {
22+ abstract class MultiStringArgsHash extends UnboundFunction with ClickhouseEquivFunction {
23+
24+ def applyHash (input : Array [Any ]): Long
25+
26+ protected def funcName : String
27+
28+ override val ckFuncNames : Array [String ]
29+
30+ override def description : String = s " $name: (value: string, ...) => hash_value: long "
31+
2332 private def isExceptedType (dt : DataType ): Boolean =
2433 dt.isInstanceOf [StringType ]
2534
2635 final override def name : String = funcName
36+
2737 final override def bind (inputType : StructType ): BoundFunction = {
2838 val inputDataTypes = inputType.fields.map(_.dataType)
29- if (inputDataTypes.forall(isExceptedType)) new ScalarFunction [Long ] {
30- override def inputTypes (): Array [DataType ] = inputDataTypes
31- override def name : String = funcName
32- override def canonicalName : String = s " clickhouse. $name"
33- override def resultType : DataType = LongType
34- override def toString : String = name
35- override def produceResult (input : InternalRow ): Long = {
36- val inputStrings : Seq [UTF8String ] =
37- input.toSeq(Seq .fill(input.numFields)(StringType )).asInstanceOf [Seq [UTF8String ]]
38- inputStrings.map(invokeBase).reduce(combineHashes)
39+ if (inputDataTypes.forall(isExceptedType)) {
40+ // need to new a ScalarFunction instance for each bind,
41+ // because we do not know the number of arguments in advance
42+ new ScalarFunction [Long ] {
43+ override def inputTypes (): Array [DataType ] = inputDataTypes
44+ override def name : String = funcName
45+ override def canonicalName : String = s " clickhouse. $name"
46+ override def resultType : DataType = LongType
47+ override def toString : String = name
48+ override def produceResult (input : InternalRow ): Long = {
49+ val inputStrings : Array [Any ] =
50+ input.toSeq(Seq .fill(input.numFields)(StringType )).asInstanceOf [Seq [UTF8String ]].toArray
51+ .map(_.getBytes)
52+ applyHash(inputStrings)
53+ }
3954 }
40- }
41- else throw new UnsupportedOperationException (s " Expect multiple STRING argument. $description" )
55+ } else throw new UnsupportedOperationException (s " Expect multiple STRING argument. $description" )
4256
4357 }
4458
45- protected def funcName : String
46- override val ckFuncNames : Array [String ]
47- override def description : String = s " $name: (value: string, ...) => hash_value: long "
48- def invokeBase (value : UTF8String ): Long
49- def combineHashes (v1 : Long , v2 : Long ): Long
5059}
0 commit comments