From e59ffa681d5fd8de15c3e85a28ea3df6a9ef075b Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Wed, 11 Feb 2026 18:04:24 -0800 Subject: [PATCH 1/3] HIVE-29457: HiveSortExchangePullUpConstantsRule doesn't remove constant column from distribution keys --- .../calcite/HiveRelDistribution.java | 9 ++++-- .../distribution_key_constant_value.q | 7 +++++ .../distribution_key_constant_value.q.out | 30 +++++++++++++++++++ 3 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/distribution_key_constant_value.q create mode 100644 ql/src/test/results/clientpositive/llap/distribution_key_constant_value.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java index 513a3e6d2994..3067654571a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; +import java.util.Objects; import org.apache.calcite.plan.RelMultipleTrait; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTrait; @@ -95,9 +96,11 @@ public RelDistribution apply(TargetMapping mapping) { tmp.put(aMapping.source, aMapping.target); } - for (Integer key : keys) { - newKeys.add(tmp.get(key)); - } + keys.stream() + .map(tmp::get) + .filter(Objects::nonNull) + .forEach(newKeys::add); + return new HiveRelDistribution(type, newKeys); } diff --git a/ql/src/test/queries/clientpositive/distribution_key_constant_value.q b/ql/src/test/queries/clientpositive/distribution_key_constant_value.q new file mode 100644 index 000000000000..86ad2ba45442 --- /dev/null +++ b/ql/src/test/queries/clientpositive/distribution_key_constant_value.q @@ -0,0 +1,7 @@ +CREATE TABLE test (col1 string, col2 string); + +EXPLAIN CBO +SELECT col1 FROM test +WHERE col2 = 'a' +DISTRIBUTE BY col1, col2 +SORT BY col1, col2; diff --git a/ql/src/test/results/clientpositive/llap/distribution_key_constant_value.q.out b/ql/src/test/results/clientpositive/llap/distribution_key_constant_value.q.out new file mode 100644 index 000000000000..96a4f65cb347 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/distribution_key_constant_value.q.out @@ -0,0 +1,30 @@ +PREHOOK: query: CREATE TABLE test (col1 string, col2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +POSTHOOK: query: CREATE TABLE test (col1 string, col2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test +PREHOOK: query: EXPLAIN CBO +SELECT col1 FROM test +WHERE col2 = 'a' +DISTRIBUTE BY col1, col2 +SORT BY col1, col2 +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN CBO +SELECT col1 FROM test +WHERE col2 = 'a' +DISTRIBUTE BY col1, col2 +SORT BY col1, col2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +CBO PLAN: +HiveSortExchange(distribution=[hash[0]], collation=[[0]]) + HiveProject(col1=[$0]) + HiveFilter(condition=[=($1, _UTF-16LE'a')]) + HiveTableScan(table=[[default, test]], table:alias=[test]) + From 14774e10f01183d4dccbeac08d16262857daca05 Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Thu, 19 Feb 2026 15:15:02 -0800 Subject: [PATCH 2/3] address review comments --- .../optimizer/calcite/HiveRelDistribution.java | 9 +++------ .../rules/HiveSortPullUpConstantsRule.java | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java index 3067654571a6..513a3e6d2994 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelDistribution.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.Map; -import java.util.Objects; import org.apache.calcite.plan.RelMultipleTrait; import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.RelTrait; @@ -96,11 +95,9 @@ public RelDistribution apply(TargetMapping mapping) { tmp.put(aMapping.source, aMapping.target); } - keys.stream() - .map(tmp::get) - .filter(Objects::nonNull) - .forEach(newKeys::add); - + for (Integer key : keys) { + newKeys.add(tmp.get(key)); + } return new HiveRelDistribution(type, newKeys); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortPullUpConstantsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortPullUpConstantsRule.java index 51f53cd0ead1..99b094efa94a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortPullUpConstantsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortPullUpConstantsRule.java @@ -42,6 +42,7 @@ import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.util.Pair; import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelDistribution; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; @@ -94,9 +95,24 @@ private HiveSortExchangePullUpConstantsRule() { @Override protected void buildSort(RelBuilder relBuilder, HiveSortExchange sortNode, Mappings.TargetMapping mapping) { List fieldCollations = applyToFieldCollations(sortNode.getCollation(), mapping); - RelDistribution distribution = sortNode.getDistribution().apply(mapping); + RelDistribution distribution = applyToDistribution(sortNode.getDistribution(), mapping); relBuilder.sortExchange(distribution, RelCollations.of(fieldCollations)); } + + private RelDistribution applyToDistribution( + RelDistribution distribution, Mappings.TargetMapping mapping) { + List newKeys = new ArrayList<>(); + for (int key : distribution.getKeys()) { + final int target = mapping.getTargetOpt(key); + if (target < 0) { + // It is a constant, we can ignore it + continue; + } + newKeys.add(target); + } + + return new HiveRelDistribution(distribution.getType(), newKeys); + } } From 4f077f49cce2bc62b71483afb5a56ca43df4d52c Mon Sep 17 00:00:00 2001 From: Soumyakanti Das Date: Mon, 23 Feb 2026 10:19:47 -0800 Subject: [PATCH 3/3] Remove unnecessary SORT BY clause --- .../distribution_key_constant_value.q | 5 ++--- .../llap/distribution_key_constant_value.q.out | 14 ++++++-------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/ql/src/test/queries/clientpositive/distribution_key_constant_value.q b/ql/src/test/queries/clientpositive/distribution_key_constant_value.q index 86ad2ba45442..5b8823bbf6c6 100644 --- a/ql/src/test/queries/clientpositive/distribution_key_constant_value.q +++ b/ql/src/test/queries/clientpositive/distribution_key_constant_value.q @@ -1,7 +1,6 @@ CREATE TABLE test (col1 string, col2 string); EXPLAIN CBO -SELECT col1 FROM test +SELECT col1, col2 FROM test WHERE col2 = 'a' -DISTRIBUTE BY col1, col2 -SORT BY col1, col2; +DISTRIBUTE BY col1, col2; diff --git a/ql/src/test/results/clientpositive/llap/distribution_key_constant_value.q.out b/ql/src/test/results/clientpositive/llap/distribution_key_constant_value.q.out index 96a4f65cb347..3c078bedea21 100644 --- a/ql/src/test/results/clientpositive/llap/distribution_key_constant_value.q.out +++ b/ql/src/test/results/clientpositive/llap/distribution_key_constant_value.q.out @@ -7,24 +7,22 @@ POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test PREHOOK: query: EXPLAIN CBO -SELECT col1 FROM test +SELECT col1, col2 FROM test WHERE col2 = 'a' -DISTRIBUTE BY col1, col2 -SORT BY col1, col2 +DISTRIBUTE BY col1, col2 PREHOOK: type: QUERY PREHOOK: Input: default@test #### A masked pattern was here #### POSTHOOK: query: EXPLAIN CBO -SELECT col1 FROM test +SELECT col1, col2 FROM test WHERE col2 = 'a' -DISTRIBUTE BY col1, col2 -SORT BY col1, col2 +DISTRIBUTE BY col1, col2 POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### CBO PLAN: -HiveSortExchange(distribution=[hash[0]], collation=[[0]]) - HiveProject(col1=[$0]) +HiveSortExchange(distribution=[hash[0]], collation=[[]]) + HiveProject(col1=[$0], col2=[CAST(_UTF-16LE'a':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"):VARCHAR(2147483647) CHARACTER SET "UTF-16LE"]) HiveFilter(condition=[=($1, _UTF-16LE'a')]) HiveTableScan(table=[[default, test]], table:alias=[test])