Skip to content

Commit 4ea03f8

Browse files
Yxangpan3793
andauthored
Core: Add Java implementation of several ClickHouse hash function (#261)
* Clickhouse-core: add hash utils and cityhash64 * Clickhouse-core: add murmurhash functions * Clickhouse-core: Fix scala 2.13 build issue * Clickhouse-core: Util -> HashUtils * Licences * Core: Fix HashSuite Test position issue * Core: Fix hash testing issue, update 32-bit hash returns Int instead of Long * Update clickhouse-core/src/main/scala/xenon/clickhouse/hash/HashFunc.scala Add support for raw byte array for executeAny Co-authored-by: Cheng Pan <pan3793@gmail.com> * Core: Fix HashFunc dependency and add comment for Array[Byte] * Core: amend comment * amend testing code --------- Co-authored-by: Cheng Pan <pan3793@gmail.com>
1 parent 2f05c39 commit 4ea03f8

File tree

12 files changed

+742
-0
lines changed

12 files changed

+742
-0
lines changed

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ project(':clickhouse-core') {
151151

152152
api "org.slf4j:slf4j-api:$slf4j_version"
153153
api "org.apache.commons:commons-lang3:$commons_lang3_version"
154+
api "commons-codec:commons-codec:$commons_codec_version"
154155

155156
api "com.fasterxml.jackson.core:jackson-databind:$jackson_version"
156157
api "com.fasterxml.jackson.datatype:jackson-datatype-jsr310:$jackson_version"
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
/*
2+
* Licensed under the Apache License, Version 2.0 (the "License");
3+
* you may not use this file except in compliance with the License.
4+
* You may obtain a copy of the License at
5+
*
6+
* https://www.apache.org/licenses/LICENSE-2.0
7+
*
8+
* Unless required by applicable law or agreed to in writing, software
9+
* distributed under the License is distributed on an "AS IS" BASIS,
10+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
* See the License for the specific language governing permissions and
12+
* limitations under the License.
13+
*/
14+
15+
package xenon.clickhouse
16+
17+
import xenon.clickhouse.base.ClickHouseSingleMixIn
18+
import xenon.clickhouse.client.NodeClient
19+
import xenon.clickhouse.hash._
20+
21+
class HashSuite extends ClickHouseSingleMixIn with Logging {
22+
23+
def testHash(
24+
client: NodeClient,
25+
fun: Array[Any] => Long,
26+
testInput: Array[Any],
27+
clickhouseFunName: String,
28+
clickhouseInputExpr: String
29+
): Unit = {
30+
val sql = s"SELECT toInt64($clickhouseFunName($clickhouseInputExpr))"
31+
val output = client.syncQueryAndCheckOutputJSONCompactEachRowWithNamesAndTypes(sql)
32+
assert(output.rows === 1L)
33+
val row = output.records.head
34+
assert(row.length === 1L)
35+
try
36+
row.head.asText.toLong
37+
catch {
38+
case _: NumberFormatException =>
39+
fail(s"clickhouse function's return should be a long, but got ${row.head.asText}")
40+
}
41+
val actual = row.head.asText.toLong
42+
val expected = fun(testInput)
43+
assert(actual === expected)
44+
}
45+
46+
val testElement: Array[Any] = Array(
47+
"spark-clickhouse-connector",
48+
"Apache Spark",
49+
"ClickHouse",
50+
"Yandex",
51+
"热爱",
52+
"🇨🇳",
53+
"This is a long test text. 在传统的行式数据库系统中,数据按如下顺序存储。🇨🇳" * 5
54+
)
55+
56+
val testInputs: Array[Array[Any]] =
57+
(testElement.combinations(1) ++ testElement.combinations(2) ++ testElement.combinations(3)).toArray
58+
59+
test("CityHash64 Java implementation") {
60+
withNodeClient() { client =>
61+
testInputs.foreach { testInput =>
62+
val clickhouseInputExpr = testInput.mkString("'", "', '", "'")
63+
testHash(
64+
client,
65+
x => CityHash64(x),
66+
testInput,
67+
"cityHash64",
68+
clickhouseInputExpr
69+
)
70+
}
71+
}
72+
}
73+
74+
test("Murmurhash2_32 Java implementation") {
75+
withNodeClient() { client =>
76+
testInputs.foreach { testInput =>
77+
val clickhouseInputExpr = testInput.mkString("'", "', '", "'")
78+
testHash(
79+
client,
80+
x => HashUtils.toUInt32(Murmurhash2_32(x)),
81+
testInput,
82+
"murmurHash2_32",
83+
clickhouseInputExpr
84+
)
85+
}
86+
}
87+
}
88+
89+
test("Murmurhash2_64 Java implementation") {
90+
withNodeClient() { client =>
91+
testInputs.foreach { testInput =>
92+
val clickhouseInputExpr = testInput.mkString("'", "', '", "'")
93+
testHash(
94+
client,
95+
x => Murmurhash2_64(x),
96+
testInput,
97+
"murmurHash2_64",
98+
clickhouseInputExpr
99+
)
100+
}
101+
}
102+
}
103+
104+
test("Murmurhash3_32 Java implementation") {
105+
withNodeClient() { client =>
106+
testInputs.foreach { testInput =>
107+
val clickhouseInputExpr = testInput.mkString("'", "', '", "'")
108+
testHash(
109+
client,
110+
x => HashUtils.toUInt32(Murmurhash3_32(x)),
111+
testInput,
112+
"murmurHash3_32",
113+
clickhouseInputExpr
114+
)
115+
}
116+
}
117+
}
118+
119+
test("Murmurhash3_64 Java implementation") {
120+
withNodeClient() { client =>
121+
testInputs.foreach { testInput =>
122+
val clickhouseInputExpr = testInput.mkString("'", "', '", "'")
123+
testHash(
124+
client,
125+
x => Murmurhash3_64(x),
126+
testInput,
127+
"murmurHash3_64",
128+
clickhouseInputExpr
129+
)
130+
}
131+
}
132+
}
133+
}

0 commit comments

Comments
 (0)