From 533bce51f6c919052c11268a1314ba727cc07042 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Fri, 20 Feb 2026 16:12:02 -0800 Subject: [PATCH 1/3] HIVE-29473: preventing stats override of select columns with 2+ LVs --- .../annotation/StatsRulesProcFactory.java | 36 ++- .../annotate_stats_lateral_view_join.q.out | 216 +++++++++--------- .../llap/lateral_view_noalias.q.out | 2 - .../clientpositive/llap/udtf_explode.q.out | 4 - .../results/clientpositive/llap/union26.q.out | 22 +- .../llap/union_lateralview.q.out | 16 +- .../results/clientpositive/nonmr_fetch.q.out | 8 +- 7 files changed, 165 insertions(+), 139 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 42d62e0a64e1..e5b0ae3f755c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -3051,15 +3051,47 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, final Map columnExprMap = lop.getColumnExprMap(); final RowSchema schema = lop.getSchema(); + int numSelectCols = parents.get(LateralViewJoinOperator.SELECT_TAG).getSchema().getSignature().size(); + + List selectColInfos = new ArrayList<>(numSelectCols); + Map selectExprMap = new HashMap<>(); + + List udtfColInfos = new ArrayList<>(Math.max(0, schema.getSignature().size() - numSelectCols)); + Map udtfExprMap = new HashMap<>(); + + // Strictly isolate both the Schema and Expression Map by parent + List signature = schema.getSignature(); + for (int i = 0; i < signature.size(); i++) { + ColumnInfo ci = signature.get(i); + String internalName = ci.getInternalName(); + + if (i < numSelectCols) { + selectColInfos.add(ci); + if (columnExprMap.containsKey(internalName)) { + selectExprMap.put(internalName, columnExprMap.get(internalName)); + } + } else { + udtfColInfos.add(ci); + if (columnExprMap.containsKey(internalName)) { + udtfExprMap.put(internalName, columnExprMap.get(internalName)); + } + } + } + + RowSchema selectSchema = new RowSchema(selectColInfos); + RowSchema udtfSchema = new RowSchema(udtfColInfos); + + // Select branch stats joinedStats.updateColumnStatsState(selectStats.getColumnStatsState()); final List selectColStats = StatsUtils - .getColStatisticsFromExprMap(conf, selectStats, columnExprMap, schema); + .getColStatisticsFromExprMap(conf, selectStats, selectExprMap, selectSchema); StatsUtils.scaleColStatistics(selectColStats, factor); joinedStats.addToColumnStats(selectColStats); + // UDTF branch stats joinedStats.updateColumnStatsState(udtfStats.getColumnStatsState()); final List udtfColStats = StatsUtils - .getColStatisticsFromExprMap(conf, udtfStats, columnExprMap, schema); + .getColStatisticsFromExprMap(conf, udtfStats, udtfExprMap, udtfSchema); joinedStats.addToColumnStats(udtfColStats); } diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_lateral_view_join.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_lateral_view_join.q.out index f8b3838ce51d..2f53aab463d0 100644 --- a/ql/src/test/results/clientpositive/llap/annotate_stats_lateral_view_join.q.out +++ b/ql/src/test/results/clientpositive/llap/annotate_stats_lateral_view_join.q.out @@ -233,10 +233,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -254,10 +254,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -284,10 +284,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -305,10 +305,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -367,17 +367,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3000 Data size: 600000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 468000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -391,14 +391,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3000 Data size: 600000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 468000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -418,17 +418,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3000 Data size: 600000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 468000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -442,14 +442,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3000 Data size: 600000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 468000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -478,17 +478,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3000 Data size: 600000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 468000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -502,14 +502,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3000 Data size: 600000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 468000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -529,17 +529,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 300 Data size: 45600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 300 Data size: 32400 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3000 Data size: 600000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 468000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -553,14 +553,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3000 Data size: 600000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 468000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3000 Data size: 324000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -761,10 +761,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -782,10 +782,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -812,10 +812,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -833,10 +833,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -895,17 +895,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -919,14 +919,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -946,17 +946,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -970,14 +970,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1006,17 +1006,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1030,14 +1030,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1057,17 +1057,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1081,14 +1081,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 132 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 152 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1289,10 +1289,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1310,10 +1310,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1340,10 +1340,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1361,10 +1361,10 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1423,17 +1423,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1447,14 +1447,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1474,17 +1474,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1498,14 +1498,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1534,17 +1534,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1558,14 +1558,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1585,17 +1585,17 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int) outputColumnNames: _col0, _col1, _col6, _col7 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1609,14 +1609,14 @@ STAGE PLANS: function name: explode Lateral View Join Operator outputColumnNames: _col0, _col1, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 324 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/lateral_view_noalias.q.out b/ql/src/test/results/clientpositive/llap/lateral_view_noalias.q.out index a06607e78bf0..fc2801bcd898 100644 --- a/ql/src/test/results/clientpositive/llap/lateral_view_noalias.q.out +++ b/ql/src/test/results/clientpositive/llap/lateral_view_noalias.q.out @@ -294,5 +294,3 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@lv_noalias POSTHOOK: Input: default@src #### A masked pattern was here #### -key1 100 key1 100 -key2 200 key2 200 diff --git a/ql/src/test/results/clientpositive/llap/udtf_explode.q.out b/ql/src/test/results/clientpositive/llap/udtf_explode.q.out index 94d3cd930a48..f1c47f4452ae 100644 --- a/ql/src/test/results/clientpositive/llap/udtf_explode.q.out +++ b/ql/src/test/results/clientpositive/llap/udtf_explode.q.out @@ -240,8 +240,6 @@ POSTHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1, 2, 3)) A POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -1 1 -2 1 3 1 PREHOOK: query: EXPLAIN SELECT explode(map(1, 'one', 2, 'two', 3, 'three')) as (myKey, myVal) FROM src ORDER BY myKey, myVal LIMIT 3 PREHOOK: type: QUERY @@ -522,8 +520,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 1 one 1 -2 two 1 -3 three 1 PREHOOK: query: SELECT src.key, myCol FROM src lateral view explode(array(1, 2, 3)) x AS myCol ORDER BY src.key, myCol LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/union26.q.out b/ql/src/test/results/clientpositive/llap/union26.q.out index 234d738e16aa..5ea763c0f256 100644 --- a/ql/src/test/results/clientpositive/llap/union26.q.out +++ b/ql/src/test/results/clientpositive/llap/union26.q.out @@ -129,20 +129,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 115500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Select Operator expressions: array(1,2,3) (type: array) @@ -157,20 +157,20 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 115500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string), _col1 (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -191,13 +191,13 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -207,14 +207,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 645 Data size: 154155 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 645 Data size: 119970 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/union_lateralview.q.out b/ql/src/test/results/clientpositive/llap/union_lateralview.q.out index 3f3a89cda01b..a612a4c198ec 100644 --- a/ql/src/test/results/clientpositive/llap/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/llap/union_lateralview.q.out @@ -90,13 +90,13 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 231000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1000 Data size: 231000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Select Operator expressions: Const array [1, 2, 3] (type: array) @@ -111,13 +111,13 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 231000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1000 Data size: 231000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: all inputs @@ -143,13 +143,13 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 231000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1000 Data size: 231000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Select Operator expressions: Const array [1, 2, 3] (type: array) @@ -164,13 +164,13 @@ STAGE PLANS: Select Operator expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 231000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1000 Data size: 231000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/nonmr_fetch.q.out b/ql/src/test/results/clientpositive/nonmr_fetch.q.out index 0a59ffedb2b8..40c9b89793b9 100644 --- a/ql/src/test/results/clientpositive/nonmr_fetch.q.out +++ b/ql/src/test/results/clientpositive/nonmr_fetch.q.out @@ -1081,11 +1081,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 1003500 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 40080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 40080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink Select Operator expressions: array(_col0,_col1) (type: array) @@ -1099,11 +1099,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 1003500 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 40080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col8 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 40080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: select key,X from srcpart lateral view explode(array(key,value)) L as x where (ds='2008-04-08' AND hr='11') limit 20 From 9f854a3f952fe57c415e96f95890bdd9e925a8cf Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Sun, 22 Feb 2026 12:14:38 -0800 Subject: [PATCH 2/3] HIVE-29473: better use of existing methods/libraries, unit testing and corrected.out files --- .../annotation/StatsRulesProcFactory.java | 34 +- .../TestLateralViewJoinStatsRule.java | 385 ++++++++++++++++++ .../llap/lateral_view_noalias.q.out | 2 + .../clientpositive/llap/udtf_explode.q.out | 4 + .../llap/vector_windowing.q.out | 8 +- .../clientpositive/tez/tez_union_udtf.q.out | 10 +- 6 files changed, 410 insertions(+), 33 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestLateralViewJoinStatsRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index e5b0ae3f755c..26a719489afb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -3049,38 +3049,24 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (satisfyPrecondition(selectStats) && satisfyPrecondition(udtfStats)) { final Map columnExprMap = lop.getColumnExprMap(); - final RowSchema schema = lop.getSchema(); + final List signature = lop.getSchema().getSignature(); + final int numSelColumns = lop.getConf().getNumSelColumns(); - int numSelectCols = parents.get(LateralViewJoinOperator.SELECT_TAG).getSchema().getSignature().size(); + // Split schemas using subList + RowSchema selectSchema = new RowSchema(new ArrayList<>(signature.subList(0, numSelColumns))); + RowSchema udtfSchema = new RowSchema(new ArrayList<>(signature.subList(numSelColumns, signature.size()))); - List selectColInfos = new ArrayList<>(numSelectCols); + // Filter expression maps to avoid cross-contamination in getColStatisticsFromExprMap Map selectExprMap = new HashMap<>(); - - List udtfColInfos = new ArrayList<>(Math.max(0, schema.getSignature().size() - numSelectCols)); Map udtfExprMap = new HashMap<>(); - - // Strictly isolate both the Schema and Expression Map by parent - List signature = schema.getSignature(); for (int i = 0; i < signature.size(); i++) { - ColumnInfo ci = signature.get(i); - String internalName = ci.getInternalName(); - - if (i < numSelectCols) { - selectColInfos.add(ci); - if (columnExprMap.containsKey(internalName)) { - selectExprMap.put(internalName, columnExprMap.get(internalName)); - } - } else { - udtfColInfos.add(ci); - if (columnExprMap.containsKey(internalName)) { - udtfExprMap.put(internalName, columnExprMap.get(internalName)); - } + String name = signature.get(i).getInternalName(); + ExprNodeDesc expr = columnExprMap.get(name); + if (expr != null) { + (i < numSelColumns ? selectExprMap : udtfExprMap).put(name, expr); } } - RowSchema selectSchema = new RowSchema(selectColInfos); - RowSchema udtfSchema = new RowSchema(udtfColInfos); - // Select branch stats joinedStats.updateColumnStatsState(selectStats.getColumnStatsState()); final List selectColStats = StatsUtils diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestLateralViewJoinStatsRule.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestLateralViewJoinStatsRule.java new file mode 100644 index 000000000000..141792113b61 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestLateralViewJoinStatsRule.java @@ -0,0 +1,385 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.stats.annotation; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.Statistics; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.junit.Test; +import org.mockito.stubbing.Answer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Tests for LateralViewJoinStatsRule - specifically HIVE-29473 fix. + * + * The bug: In nested lateral views, column name collisions (e.g., _col0 appearing + * in both SELECT and UDTF branches) cause the UDTF column's NDV to overwrite + * the SELECT column's NDV, leading to incorrect CBO estimates. + */ +public class TestLateralViewJoinStatsRule { + + /** + * Tests that column stats from SELECT and UDTF branches are properly isolated + * when processing LateralViewJoinOperator statistics. + * + * Scenario: SELECT branch has a column with NDV=2, UDTF branch has a column with NDV=6. + * After stats annotation, the SELECT column should retain NDV=2. + */ + @Test + public void testColumnStatsIsolation() throws Exception { + // SELECT parent: 1 column "_col0" with NDV=2 + Operator selectParent = createMockParentOperator( + "_col0", 2, 100); + + // UDTF parent: 1 column "_col1" with NDV=6 + Operator udtfParent = createMockParentOperator( + "_col1", 6, 100); + + // LVJ has 2 columns: _col0 (from select), _col1 (from udtf) + List lvjSignature = Arrays.asList( + new ColumnInfo("_col0", TypeInfoFactory.stringTypeInfo, "", false), + new ColumnInfo("_col1", TypeInfoFactory.stringTypeInfo, "", false)); + + Map colExprMap = new HashMap<>(); + colExprMap.put("_col0", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col0", "", false)); + colExprMap.put("_col1", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col1", "", false)); + + // Capture the statistics set on the LVJ operator + final Statistics[] capturedStats = new Statistics[1]; + LateralViewJoinOperator lvj = createMockLVJOperator( + selectParent, udtfParent, lvjSignature, colExprMap, 1, capturedStats); + + // Run the stats rule + AnnotateStatsProcCtx ctx = createAnnotateStatsProcCtx(); + StatsRulesProcFactory.LateralViewJoinStatsRule rule = + new StatsRulesProcFactory.LateralViewJoinStatsRule(); + rule.process(lvj, new Stack<>(), ctx); + + // Verify results + assertNotNull("Statistics should be set on LVJ", capturedStats[0]); + + ColStatistics selectColStats = capturedStats[0].getColumnStatisticsFromColName("_col0"); + assertNotNull("Should have stats for _col0", selectColStats); + assertEquals("SELECT column _col0 should have NDV=2", 2, selectColStats.getCountDistint()); + + ColStatistics udtfColStats = capturedStats[0].getColumnStatisticsFromColName("_col1"); + assertNotNull("Should have stats for _col1", udtfColStats); + assertEquals("UDTF column _col1 should have NDV=6", 6, udtfColStats.getCountDistint()); + } + + /** + * Tests the specific bug scenario from HIVE-29473: nested lateral views where + * internal column names collide (both sides have _col0). + * + * Before fix: UDTF's _col0 NDV would overwrite SELECT's _col0 NDV. + * After fix: Each branch's stats are isolated. + */ + @Test + public void testNestedLateralViewNameCollision() throws Exception { + // Simulate nested LV scenario where both branches internally use _col0 + // SELECT parent: _col0 with NDV=2 (e.g., grouping column 'id') + Operator selectParent = createMockParentOperator( + "_col0", 2, 100); + + // UDTF parent: _col0 with NDV=6 (e.g., from explode of column with 6 distinct values) + Operator udtfParent = createMockParentOperator( + "_col0", 6, 100); + + // LVJ output schema: both columns exist but with different output names + // In real scenario, LVJ renames to avoid collision, but the columnExprMap + // still references the original _col0 from each parent + List lvjSignature = Arrays.asList( + new ColumnInfo("_col0", TypeInfoFactory.stringTypeInfo, "", false), + new ColumnInfo("_col1", TypeInfoFactory.stringTypeInfo, "", false)); + + // columnExprMap: output _col0 -> select's _col0, output _col1 -> udtf's _col0 + Map colExprMap = new HashMap<>(); + colExprMap.put("_col0", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col0", "", false)); + colExprMap.put("_col1", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col0", "", false)); + + final Statistics[] capturedStats = new Statistics[1]; + LateralViewJoinOperator lvj = createMockLVJOperator( + selectParent, udtfParent, lvjSignature, colExprMap, 1, capturedStats); + + // Run the stats rule + AnnotateStatsProcCtx ctx = createAnnotateStatsProcCtx(); + StatsRulesProcFactory.LateralViewJoinStatsRule rule = + new StatsRulesProcFactory.LateralViewJoinStatsRule(); + rule.process(lvj, new Stack<>(), ctx); + + // The key assertion: _col0 (from SELECT) should have NDV=2, NOT 6 + assertNotNull("Statistics should be set", capturedStats[0]); + ColStatistics col0Stats = capturedStats[0].getColumnStatisticsFromColName("_col0"); + assertNotNull("Should have stats for _col0", col0Stats); + + // This is the bug fix verification - before fix, this would be 6 (contaminated by UDTF) + assertEquals("SELECT's _col0 should retain NDV=2, not be overwritten by UDTF's NDV=6", + 2, col0Stats.getCountDistint()); + } + + /** + * Tests that columns missing from columnExprMap are handled gracefully. + * The fix has a null check: if (expr != null) - this tests that branch. + */ + @Test + public void testMissingColumnExprMapEntry() throws Exception { + Operator selectParent = createMockParentOperator( + "_col0", 2, 100); + Operator udtfParent = createMockParentOperator( + "_col1", 6, 100); + + List lvjSignature = Arrays.asList( + new ColumnInfo("_col0", TypeInfoFactory.stringTypeInfo, "", false), + new ColumnInfo("_col1", TypeInfoFactory.stringTypeInfo, "", false), + new ColumnInfo("_col2", TypeInfoFactory.stringTypeInfo, "", false)); + + // Only provide expr for _col0 and _col1, _col2 is missing + Map colExprMap = new HashMap<>(); + colExprMap.put("_col0", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col0", "", false)); + colExprMap.put("_col1", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col1", "", false)); + + final Statistics[] capturedStats = new Statistics[1]; + LateralViewJoinOperator lvj = createMockLVJOperator( + selectParent, udtfParent, lvjSignature, colExprMap, 1, capturedStats); + + AnnotateStatsProcCtx ctx = createAnnotateStatsProcCtx(); + StatsRulesProcFactory.LateralViewJoinStatsRule rule = + new StatsRulesProcFactory.LateralViewJoinStatsRule(); + rule.process(lvj, new Stack<>(), ctx); + + assertNotNull("Statistics should be set", capturedStats[0]); + assertEquals("SELECT column should have NDV=2", 2, + capturedStats[0].getColumnStatisticsFromColName("_col0").getCountDistint()); + assertEquals("UDTF column should have NDV=6", 6, + capturedStats[0].getColumnStatisticsFromColName("_col1").getCountDistint()); + } + + /** + * Tests multiple columns per branch with name collision on one of them. + */ + @Test + public void testMultipleColumnsWithPartialCollision() throws Exception { + // SELECT has _col0 (NDV=2) and _col1 (NDV=10) + Operator selectParent = createMockParentOperatorMultiCol( + Arrays.asList("_col0", "_col1"), Arrays.asList(2L, 10L), 100); + + // UDTF has _col0 (NDV=50) - collides with SELECT's _col0 + Operator udtfParent = createMockParentOperator( + "_col0", 50, 100); + + // LVJ output: _col0, _col1 from SELECT, _col2 from UDTF + List lvjSignature = Arrays.asList( + new ColumnInfo("_col0", TypeInfoFactory.stringTypeInfo, "", false), + new ColumnInfo("_col1", TypeInfoFactory.stringTypeInfo, "", false), + new ColumnInfo("_col2", TypeInfoFactory.stringTypeInfo, "", false)); + + Map colExprMap = new HashMap<>(); + colExprMap.put("_col0", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col0", "", false)); + colExprMap.put("_col1", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col1", "", false)); + colExprMap.put("_col2", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col0", "", false)); + + final Statistics[] capturedStats = new Statistics[1]; + LateralViewJoinOperator lvj = createMockLVJOperator( + selectParent, udtfParent, lvjSignature, colExprMap, 2, capturedStats); + + AnnotateStatsProcCtx ctx = createAnnotateStatsProcCtx(); + StatsRulesProcFactory.LateralViewJoinStatsRule rule = + new StatsRulesProcFactory.LateralViewJoinStatsRule(); + rule.process(lvj, new Stack<>(), ctx); + + assertNotNull("Statistics should be set", capturedStats[0]); + assertEquals("SELECT _col0 should retain NDV=2", 2, + capturedStats[0].getColumnStatisticsFromColName("_col0").getCountDistint()); + assertEquals("SELECT _col1 should have NDV=10", 10, + capturedStats[0].getColumnStatisticsFromColName("_col1").getCountDistint()); + assertEquals("UDTF _col2 should have NDV=50", 50, + capturedStats[0].getColumnStatisticsFromColName("_col2").getCountDistint()); + } + + /** + * Tests edge case: only SELECT columns (numSelColumns = signature.size()). + */ + @Test + public void testOnlySelectColumns() throws Exception { + Operator selectParent = createMockParentOperator( + "_col0", 5, 100); + Operator udtfParent = createMockParentOperator( + "_col1", 10, 100); + + // LVJ has only 1 column from SELECT + List lvjSignature = Arrays.asList( + new ColumnInfo("_col0", TypeInfoFactory.stringTypeInfo, "", false)); + + Map colExprMap = new HashMap<>(); + colExprMap.put("_col0", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col0", "", false)); + + final Statistics[] capturedStats = new Statistics[1]; + LateralViewJoinOperator lvj = createMockLVJOperator( + selectParent, udtfParent, lvjSignature, colExprMap, 1, capturedStats); + + AnnotateStatsProcCtx ctx = createAnnotateStatsProcCtx(); + StatsRulesProcFactory.LateralViewJoinStatsRule rule = + new StatsRulesProcFactory.LateralViewJoinStatsRule(); + rule.process(lvj, new Stack<>(), ctx); + + assertNotNull("Statistics should be set", capturedStats[0]); + assertEquals("SELECT column should have NDV=5", 5, + capturedStats[0].getColumnStatisticsFromColName("_col0").getCountDistint()); + } + + /** + * Tests edge case: only UDTF columns (numSelColumns = 0). + */ + @Test + public void testOnlyUdtfColumns() throws Exception { + Operator selectParent = createMockParentOperator( + "_col0", 5, 100); + Operator udtfParent = createMockParentOperator( + "_col1", 10, 100); + + // LVJ has only 1 column from UDTF + List lvjSignature = Arrays.asList( + new ColumnInfo("_col1", TypeInfoFactory.stringTypeInfo, "", false)); + + Map colExprMap = new HashMap<>(); + colExprMap.put("_col1", new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "_col1", "", false)); + + final Statistics[] capturedStats = new Statistics[1]; + LateralViewJoinOperator lvj = createMockLVJOperator( + selectParent, udtfParent, lvjSignature, colExprMap, 0, capturedStats); + + AnnotateStatsProcCtx ctx = createAnnotateStatsProcCtx(); + StatsRulesProcFactory.LateralViewJoinStatsRule rule = + new StatsRulesProcFactory.LateralViewJoinStatsRule(); + rule.process(lvj, new Stack<>(), ctx); + + assertNotNull("Statistics should be set", capturedStats[0]); + assertEquals("UDTF column should have NDV=10", 10, + capturedStats[0].getColumnStatisticsFromColName("_col1").getCountDistint()); + } + + private Operator createMockParentOperator( + String colName, long ndv, long numRows) { + return createMockParentOperatorMultiCol(Arrays.asList(colName), Arrays.asList(ndv), numRows); + } + + private Operator createMockParentOperatorMultiCol( + List colNames, List ndvs, long numRows) { + @SuppressWarnings("unchecked") + Operator parent = mock(Operator.class); + + Statistics stats = new Statistics(numRows, numRows * 10, 0, 0); + List colStatsList = new ArrayList<>(); + List signature = new ArrayList<>(); + + for (int i = 0; i < colNames.size(); i++) { + String colName = colNames.get(i); + ColStatistics colStats = new ColStatistics(colName, "string"); + colStats.setCountDistint(ndvs.get(i)); + colStats.setNumNulls(0); + colStatsList.add(colStats); + signature.add(new ColumnInfo(colName, TypeInfoFactory.stringTypeInfo, "", false)); + } + + stats.addToColumnStats(colStatsList); + stats.setColumnStatsState(Statistics.State.COMPLETE); + when(parent.getStatistics()).thenReturn(stats); + when(parent.getSchema()).thenReturn(new RowSchema(signature)); + + return parent; + } + + @SuppressWarnings("unchecked") + private LateralViewJoinOperator createMockLVJOperator( + Operator selectParent, + Operator udtfParent, + List signature, + Map colExprMap, + int numSelColumns, + Statistics[] capturedStats) { + + LateralViewJoinOperator lvj = mock(LateralViewJoinOperator.class); + + // Parent operators + List> parents = new ArrayList<>(); + parents.add(selectParent); + parents.add(udtfParent); + when(lvj.getParentOperators()).thenReturn(parents); + + // Schema + when(lvj.getSchema()).thenReturn(new RowSchema(signature)); + + // Column expression map + when(lvj.getColumnExprMap()).thenReturn(colExprMap); + + // LVJ descriptor with numSelColumns + List outputColNames = new ArrayList<>(); + for (ColumnInfo ci : signature) { + outputColNames.add(ci.getInternalName()); + } + LateralViewJoinDesc desc = new LateralViewJoinDesc(numSelColumns, outputColNames); + when(lvj.getConf()).thenReturn(desc); + + // Capture setStatistics call + doAnswer((Answer) invocation -> { + capturedStats[0] = invocation.getArgument(0); + return null; + }).when(lvj).setStatistics(any(Statistics.class)); + + return lvj; + } + + private AnnotateStatsProcCtx createAnnotateStatsProcCtx() { + HiveConf conf = new HiveConf(); + // Disable runtime stats to avoid NPE in applyRuntimeStats + conf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_REEXECUTION_ENABLED, false); + + Context context = mock(Context.class); + when(context.getConf()).thenReturn(conf); + + ParseContext pctx = mock(ParseContext.class); + when(pctx.getConf()).thenReturn(conf); + when(pctx.getContext()).thenReturn(context); + + return new AnnotateStatsProcCtx(pctx); + } +} diff --git a/ql/src/test/results/clientpositive/llap/lateral_view_noalias.q.out b/ql/src/test/results/clientpositive/llap/lateral_view_noalias.q.out index fc2801bcd898..a06607e78bf0 100644 --- a/ql/src/test/results/clientpositive/llap/lateral_view_noalias.q.out +++ b/ql/src/test/results/clientpositive/llap/lateral_view_noalias.q.out @@ -294,3 +294,5 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@lv_noalias POSTHOOK: Input: default@src #### A masked pattern was here #### +key1 100 key1 100 +key2 200 key2 200 diff --git a/ql/src/test/results/clientpositive/llap/udtf_explode.q.out b/ql/src/test/results/clientpositive/llap/udtf_explode.q.out index f1c47f4452ae..94d3cd930a48 100644 --- a/ql/src/test/results/clientpositive/llap/udtf_explode.q.out +++ b/ql/src/test/results/clientpositive/llap/udtf_explode.q.out @@ -240,6 +240,8 @@ POSTHOOK: query: SELECT a.myCol, count(1) FROM (SELECT explode(array(1, 2, 3)) A POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +1 1 +2 1 3 1 PREHOOK: query: EXPLAIN SELECT explode(map(1, 'one', 2, 'two', 3, 'three')) as (myKey, myVal) FROM src ORDER BY myKey, myVal LIMIT 3 PREHOOK: type: QUERY @@ -520,6 +522,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### 1 one 1 +2 two 1 +3 three 1 PREHOOK: query: SELECT src.key, myCol FROM src lateral view explode(array(1, 2, 3)) x AS myCol ORDER BY src.key, myCol LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out index 2b3c63670fb4..b2a7a7b08056 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -5579,7 +5579,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 3, 1, 2] - Statistics: Num rows: 26 Data size: 6890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5613,7 +5613,7 @@ STAGE PLANS: outputTypes: [bigint, string, string, int, int] partitionExpressions: [col 0:string] streamingColumns: [] - Statistics: Num rows: 26 Data size: 6890 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -5621,13 +5621,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 3, 2, 1, 4] - Statistics: Num rows: 26 Data size: 7098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 26 Data size: 7098 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out index 1a1c7fabbf1a..e5f18e0a7da4 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union_udtf.q.out @@ -59,13 +59,13 @@ Stage-3 Reduce Output Operator [RS_56] Group By Operator [GBY_55] (rows=1 width=400) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(1)","count(col1)","compute_bit_vector_hll(col1)","min(col2)","max(col2)","count(col2)","compute_bit_vector_hll(col2)"] - Select Operator [SEL_54] (rows=4 width=136) + Select Operator [SEL_54] (rows=4 width=91) Output:["col1","col2"] Please refer to the previous Select Operator [SEL_52] <-Map 4 [CONTAINS] File Output Operator [FS_45] table:{"name:":"default.x"} - Select Operator [SEL_43] (rows=2 width=132) + Select Operator [SEL_43] (rows=2 width=86) Output:["_col0","_col1"] Lateral View Join Operator [LVJ_41] (rows=2 width=134) Output:["_col0","_col6"] @@ -81,12 +81,12 @@ Stage-3 Reduce Output Operator [RS_48] Group By Operator [GBY_47] (rows=1 width=400) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(1)","count(col1)","compute_bit_vector_hll(col1)","min(col2)","max(col2)","count(col2)","compute_bit_vector_hll(col2)"] - Select Operator [SEL_46] (rows=4 width=136) + Select Operator [SEL_46] (rows=4 width=91) Output:["col1","col2"] Please refer to the previous Select Operator [SEL_43] File Output Operator [FS_45] table:{"name:":"default.x"} - Select Operator [SEL_43] (rows=2 width=132) + Select Operator [SEL_43] (rows=2 width=86) Output:["_col0","_col1"] Lateral View Join Operator [LVJ_41] (rows=2 width=134) Output:["_col0","_col6"] @@ -98,7 +98,7 @@ Stage-3 Reduce Output Operator [RS_48] Group By Operator [GBY_47] (rows=1 width=400) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["max(length(col1))","avg(COALESCE(length(col1),0))","count(1)","count(col1)","compute_bit_vector_hll(col1)","min(col2)","max(col2)","count(col2)","compute_bit_vector_hll(col2)"] - Select Operator [SEL_46] (rows=4 width=136) + Select Operator [SEL_46] (rows=4 width=91) Output:["col1","col2"] Please refer to the previous Select Operator [SEL_43] Stage-4(CONDITIONAL) From 7f48c9d03f671cae356678793a711f5352b41734 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Mon, 23 Feb 2026 13:03:06 -0800 Subject: [PATCH 3/3] HIVE-29473: further code optimizxations + bug-specific test file --- .../annotation/StatsRulesProcFactory.java | 15 +- .../clientpositive/lvj_stats_isolation.q | 34 ++ .../llap/lvj_stats_isolation.q.out | 360 ++++++++++++++++++ 3 files changed, 405 insertions(+), 4 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/lvj_stats_isolation.q create mode 100644 ql/src/test/results/clientpositive/llap/lvj_stats_isolation.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 26a719489afb..8960d5e40723 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -3057,13 +3057,20 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, RowSchema udtfSchema = new RowSchema(new ArrayList<>(signature.subList(numSelColumns, signature.size()))); // Filter expression maps to avoid cross-contamination in getColStatisticsFromExprMap - Map selectExprMap = new HashMap<>(); - Map udtfExprMap = new HashMap<>(); + Map selectExprMap = Maps.newHashMapWithExpectedSize(numSelColumns); + Map udtfExprMap = Maps.newHashMapWithExpectedSize(signature.size() - numSelColumns); for (int i = 0; i < signature.size(); i++) { String name = signature.get(i).getInternalName(); ExprNodeDesc expr = columnExprMap.get(name); - if (expr != null) { - (i < numSelColumns ? selectExprMap : udtfExprMap).put(name, expr); + + if (expr == null) { + continue; + } + + if (i < numSelColumns) { + selectExprMap.put(name, expr); + } else { + udtfExprMap.put(name, expr); } } diff --git a/ql/src/test/queries/clientpositive/lvj_stats_isolation.q b/ql/src/test/queries/clientpositive/lvj_stats_isolation.q new file mode 100644 index 000000000000..73c812c0f49f --- /dev/null +++ b/ql/src/test/queries/clientpositive/lvj_stats_isolation.q @@ -0,0 +1,34 @@ +create table lvj_stats (id string, f1 string); + +insert into lvj_stats values + ('a','v1'), ('a','v2'), ('a','v3'), + ('b','v4'), ('b','v5'), ('b','v6'); + +analyze table lvj_stats compute statistics; +analyze table lvj_stats compute statistics for columns; + +-- Test that LV columns' stats no longer inflate SELECT columns' sizes +explain +select id, f1, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id, f1; + +select id, f1, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id, f1; + +-- Test that LV columns' stats no longer override NDV of a base column +alter table lvj_stats update statistics for column id set('numDVs'='0','numNulls'='0'); + +explain +select id, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id; + +select id, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id; diff --git a/ql/src/test/results/clientpositive/llap/lvj_stats_isolation.q.out b/ql/src/test/results/clientpositive/llap/lvj_stats_isolation.q.out new file mode 100644 index 000000000000..bcd6f8e2cc52 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/lvj_stats_isolation.q.out @@ -0,0 +1,360 @@ +PREHOOK: query: create table lvj_stats (id string, f1 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lvj_stats +POSTHOOK: query: create table lvj_stats (id string, f1 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lvj_stats +PREHOOK: query: insert into lvj_stats values + ('a','v1'), ('a','v2'), ('a','v3'), + ('b','v4'), ('b','v5'), ('b','v6') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@lvj_stats +POSTHOOK: query: insert into lvj_stats values + ('a','v1'), ('a','v2'), ('a','v3'), + ('b','v4'), ('b','v5'), ('b','v6') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@lvj_stats +POSTHOOK: Lineage: lvj_stats.f1 SCRIPT [] +POSTHOOK: Lineage: lvj_stats.id SCRIPT [] +PREHOOK: query: analyze table lvj_stats compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@lvj_stats +PREHOOK: Output: default@lvj_stats +POSTHOOK: query: analyze table lvj_stats compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lvj_stats +POSTHOOK: Output: default@lvj_stats +PREHOOK: query: analyze table lvj_stats compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@lvj_stats +PREHOOK: Output: default@lvj_stats +#### A masked pattern was here #### +POSTHOOK: query: analyze table lvj_stats compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@lvj_stats +POSTHOOK: Output: default@lvj_stats +#### A masked pattern was here #### +PREHOOK: query: explain +select id, f1, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id, f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@lvj_stats +#### A masked pattern was here #### +POSTHOOK: query: explain +select id, f1, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id, f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lvj_stats +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lvj_stats + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: string), f1 (type: string) + outputColumnNames: id, f1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: id (type: string), f1 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Forward + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 12546 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 12546 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1074 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1074 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Select Operator + expressions: array(_col1,_col1) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 11520 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 6 Data size: 11520 Basic stats: COMPLETE Column stats: COMPLETE + function name: posexplode + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 6 Data size: 12546 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 12546 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1074 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1074 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 1074 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 1074 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id, f1, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id, f1 +PREHOOK: type: QUERY +PREHOOK: Input: default@lvj_stats +#### A masked pattern was here #### +POSTHOOK: query: select id, f1, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id, f1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lvj_stats +#### A masked pattern was here #### +a v1 2 +a v2 2 +b v6 2 +a v3 2 +b v4 2 +b v5 2 +PREHOOK: query: alter table lvj_stats update statistics for column id set('numDVs'='0','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@lvj_stats +PREHOOK: Output: default@lvj_stats +POSTHOOK: query: alter table lvj_stats update statistics for column id set('numDVs'='0','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@lvj_stats +POSTHOOK: Output: default@lvj_stats +PREHOOK: query: explain +select id, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@lvj_stats +#### A masked pattern was here #### +POSTHOOK: query: explain +select id, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lvj_stats +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lvj_stats + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: id (type: string), f1 (type: string) + outputColumnNames: id, f1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: id (type: string), f1 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Forward + Statistics: Num rows: 6 Data size: 1026 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE + Lateral View Join Operator + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 6 Data size: 12030 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 12030 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Select Operator + expressions: array(_col1,_col1) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 11520 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 6 Data size: 11520 Basic stats: COMPLETE Column stats: COMPLETE + function name: posexplode + Lateral View Join Operator + outputColumnNames: _col0, _col2, _col3 + Statistics: Num rows: 6 Data size: 12030 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 12030 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select id, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id +PREHOOK: type: QUERY +PREHOOK: Input: default@lvj_stats +#### A masked pattern was here #### +POSTHOOK: query: select id, count(*) +from (select id, f1 from lvj_stats group by id, f1) sub +lateral view posexplode(array(f1, f1)) t1 as pos1, val1 +group by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lvj_stats +#### A masked pattern was here #### +a 6 +b 6