Skip to content

Commit 857ea19

Browse files
committed
Spark 3.4: Optimize sharding key handling when shuffle and sort, amend approach 3
1 parent 2a5f02f commit 857ea19

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

spark-3.4/clickhouse-spark/src/main/scala/xenon/clickhouse/write/WriteJobDescription.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,12 @@ case class WriteJobDescription(
7070
// front for all tasks, resulting in instant high pressure for shard 1 when stage starts.
7171
if (writeOptions.repartitionByPartition) {
7272
ExprUtils(functionRegistry).toSparkSplits(
73-
shardingKeyIgnoreRand.map(k => ExprUtils.toSplitWithModulo(k, cluster.get.totalWeight * 10)),
73+
shardingKeyIgnoreRand.map(k => ExprUtils.toSplitWithModulo(k, cluster.get.totalWeight * 5)),
7474
partitionKey
7575
)
7676
} else {
7777
ExprUtils(functionRegistry).toSparkSplits(
78-
shardingKeyIgnoreRand.map(k => ExprUtils.toSplitWithModulo(k, cluster.get.totalWeight * 10)),
78+
shardingKeyIgnoreRand.map(k => ExprUtils.toSplitWithModulo(k, cluster.get.totalWeight * 5)),
7979
None
8080
)
8181
}

0 commit comments

Comments
 (0)