From 633951cc4c2294f33ecd3ce115932818b80b652e Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 18 Dec 2025 10:48:52 -0800 Subject: [PATCH 01/14] HIVE-29368: more conservative NDV combining by PessimisticStatCombiner --- .../estimator/PessimisticStatCombiner.java | 4 +- .../TestPessimisticStatCombiner.java | 164 ++++++ .../pessimistic_stat_combiner_ndv.q | 77 +++ .../llap/pessimistic_stat_combiner_ndv.q.out | 495 ++++++++++++++++++ .../llap/vector_identity_reuse.q.out | 22 +- 5 files changed, 748 insertions(+), 14 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java create mode 100644 ql/src/test/queries/clientpositive/pessimistic_stat_combiner_ndv.q create mode 100644 ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java index dde2019eadf7..ce9136b0aec2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java @@ -41,9 +41,7 @@ public void add(ColStatistics stat) { if (stat.getAvgColLen() > result.getAvgColLen()) { result.setAvgColLen(stat.getAvgColLen()); } - if (stat.getCountDistint() > result.getCountDistint()) { - result.setCountDistint(stat.getCountDistint()); - } + result.setCountDistint(0L); if (stat.getNumNulls() > result.getNumNulls()) { result.setNumNulls(stat.getNumNulls()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java new file mode 100644 index 000000000000..b07fcec0f522 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats.estimator; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Optional; + +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; +import org.junit.jupiter.api.Test; + +class TestPessimisticStatCombiner { + + @Test + void testSingleStatPreservesNdv() { + ColStatistics stat = createStat("col1", "int", 100, 10, 5.0); + stat.setRange(new Range(0, 100)); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat); + + Optional result = combiner.getResult(); + assertTrue(result.isPresent()); + ColStatistics combined = result.get(); + + assertEquals("col1", combined.getColumnName()); + assertEquals("int", combined.getColumnType()); + assertEquals(100, combined.getCountDistint()); + assertEquals(10, combined.getNumNulls()); + assertEquals(5.0, combined.getAvgColLen()); + assertNull(combined.getRange()); + assertTrue(combined.isEstimated()); + } + + @Test + void testCombineTakesMaxOfAvgColLen() { + ColStatistics stat1 = createStat("col1", "string", 50, 5, 10.0); + ColStatistics stat2 = createStat("col2", "string", 30, 3, 20.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(20.0, combined.getAvgColLen()); + } + + @Test + void testCombineTakesMaxOfNumNulls() { + ColStatistics stat1 = createStat("col1", "int", 50, 100, 4.0); + ColStatistics stat2 = createStat("col2", "int", 30, 200, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(200, combined.getNumNulls()); + } + + @Test + void testCombineSetsCountDistinctToZero() { + ColStatistics stat1 = createStat("col1", "int", 100, 10, 4.0); + ColStatistics stat2 = createStat("col2", "int", 200, 20, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(0, combined.getCountDistint()); + } + + @Test + void testCombineTakesMaxOfNumTruesAndNumFalses() { + ColStatistics stat1 = createStat("col1", "boolean", 2, 5, 1.0); + stat1.setNumTrues(100); + stat1.setNumFalses(50); + + ColStatistics stat2 = createStat("col2", "boolean", 2, 10, 1.0); + stat2.setNumTrues(50); + stat2.setNumFalses(150); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(100, combined.getNumTrues()); + assertEquals(150, combined.getNumFalses()); + } + + @Test + void testCombinePropagatesFilteredColumnFlag() { + ColStatistics stat1 = createStat("col1", "int", 50, 5, 4.0); + ColStatistics stat2 = createStat("col2", "int", 30, 3, 4.0); + stat2.setFilterColumn(); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertTrue(combined.isFilteredColumn()); + } + + @Test + void testCombineMultipleStats() { + ColStatistics stat1 = createStat("col1", "bigint", 1000, 50, 8.0); + ColStatistics stat2 = createStat("col2", "bigint", 500, 100, 8.0); + ColStatistics stat3 = createStat("col3", "bigint", 2000, 25, 8.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + combiner.add(stat3); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(0, combined.getCountDistint()); + assertEquals(100, combined.getNumNulls()); + assertEquals(8.0, combined.getAvgColLen()); + } + + @Test + void testCombineSameColumnTwice() { + ColStatistics stat = createStat("col1", "int", 100, 10, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat); + combiner.add(stat); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(0, combined.getCountDistint()); + assertEquals(10, combined.getNumNulls()); + assertEquals(4.0, combined.getAvgColLen()); + } + + private ColStatistics createStat(String name, String type, long ndv, long numNulls, double avgColLen) { + ColStatistics stat = new ColStatistics(name, type); + stat.setCountDistint(ndv); + stat.setNumNulls(numNulls); + stat.setAvgColLen(avgColLen); + return stat; + } +} diff --git a/ql/src/test/queries/clientpositive/pessimistic_stat_combiner_ndv.q b/ql/src/test/queries/clientpositive/pessimistic_stat_combiner_ndv.q new file mode 100644 index 000000000000..dc3cc690c977 --- /dev/null +++ b/ql/src/test/queries/clientpositive/pessimistic_stat_combiner_ndv.q @@ -0,0 +1,77 @@ +CREATE TABLE t1 (cat INT, val BIGINT, data STRING); +ALTER TABLE t1 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000'); +ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN cat SET('numDVs'='100','numNulls'='0'); + +-- Test 1: IF should result in NDV of 2 +EXPLAIN +SELECT x, COUNT(*) +FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub +GROUP BY x; + +-- Test 2: CASE WHEN should result in NDV of 3 +EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE WHEN cat < 30 THEN 'X' WHEN cat < 60 THEN 'Y' ELSE 'Z' END x + FROM t1 +) sub +GROUP BY x; + +-- Test 3: CASE col WHEN val should result in NDV of 4 +EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE cat WHEN 1 THEN 'A' WHEN 2 THEN 'B' WHEN 3 THEN 'C' ELSE 'D' END x + FROM t1 +) sub +GROUP BY x; + +-- Test 4: MapJoin NO longer chosen due to NDV=1 causing tiny size estimate +CREATE TABLE t2 (key STRING, v1 STRING); + +ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN val SET('numDVs'='1000000','numNulls'='0'); +ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='5000000','numNulls'='0','avgColLen'='500.0','maxColLen'='600'); +ALTER TABLE t2 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000'); +ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN key SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100'); +ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN v1 SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100'); +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask.size=1000; + +EXPLAIN +SELECT a.k, a.total, a.sample, b.v1 +FROM ( + SELECT + k, + SUM(val) as total, + MAX(data) as sample + FROM ( + SELECT + CASE + WHEN cat BETWEEN 0 AND 4 THEN 'K00' + WHEN cat BETWEEN 5 AND 9 THEN 'K01' + WHEN cat BETWEEN 10 AND 14 THEN 'K02' + WHEN cat BETWEEN 15 AND 19 THEN 'K03' + WHEN cat BETWEEN 20 AND 24 THEN 'K04' + WHEN cat BETWEEN 25 AND 29 THEN 'K05' + WHEN cat BETWEEN 30 AND 34 THEN 'K06' + WHEN cat BETWEEN 35 AND 39 THEN 'K07' + WHEN cat BETWEEN 40 AND 44 THEN 'K08' + WHEN cat BETWEEN 45 AND 49 THEN 'K09' + WHEN cat BETWEEN 50 AND 54 THEN 'K10' + WHEN cat BETWEEN 55 AND 59 THEN 'K11' + WHEN cat BETWEEN 60 AND 64 THEN 'K12' + WHEN cat BETWEEN 65 AND 69 THEN 'K13' + WHEN cat BETWEEN 70 AND 74 THEN 'K14' + WHEN cat BETWEEN 75 AND 79 THEN 'K15' + WHEN cat BETWEEN 80 AND 84 THEN 'K16' + WHEN cat BETWEEN 85 AND 89 THEN 'K17' + WHEN cat BETWEEN 90 AND 94 THEN 'K18' + ELSE 'K19' + END as k, + val, + data + FROM t1 + ) s + GROUP BY k +) a +JOIN t2 b ON a.k = b.key; diff --git a/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out b/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out new file mode 100644 index 000000000000..b23255417f92 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out @@ -0,0 +1,495 @@ +PREHOOK: query: CREATE TABLE t1 (cat INT, val BIGINT, data STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: CREATE TABLE t1 (cat INT, val BIGINT, data STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: ALTER TABLE t1 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: ALTER TABLE t1 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN cat SET('numDVs'='100','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN cat SET('numDVs'='100','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub +GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub +GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cat > 50), 'A', 'B') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE WHEN cat < 30 THEN 'X' WHEN cat < 60 THEN 'Y' ELSE 'Z' END x + FROM t1 +) sub +GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE WHEN cat < 30 THEN 'X' WHEN cat < 60 THEN 'Y' ELSE 'Z' END x + FROM t1 +) sub +GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cat < 30)) THEN ('X') WHEN ((cat < 60)) THEN ('Y') ELSE ('Z') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE cat WHEN 1 THEN 'A' WHEN 2 THEN 'B' WHEN 3 THEN 'C' ELSE 'D' END x + FROM t1 +) sub +GROUP BY x +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT x, COUNT(*) +FROM ( + SELECT CASE cat WHEN 1 THEN 'A' WHEN 2 THEN 'B' WHEN 3 THEN 'C' ELSE 'D' END x + FROM t1 +) sub +GROUP BY x +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cat = 1)) THEN ('A') WHEN ((cat = 2)) THEN ('B') WHEN ((cat = 3)) THEN ('C') ELSE ('D') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000000 Data size: 4000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE t2 (key STRING, v1 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: CREATE TABLE t2 (key STRING, v1 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN val SET('numDVs'='1000000','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN val SET('numDVs'='1000000','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='5000000','numNulls'='0','avgColLen'='500.0','maxColLen'='600') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t1 +POSTHOOK: query: ALTER TABLE t1 UPDATE STATISTICS FOR COLUMN data SET('numDVs'='5000000','numNulls'='0','avgColLen'='500.0','maxColLen'='600') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@t1 +PREHOOK: query: ALTER TABLE t2 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 UPDATE STATISTICS SET('numRows'='1000000','rawDataSize'='100000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN key SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN key SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN v1 SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t2 +PREHOOK: Output: default@t2 +POSTHOOK: query: ALTER TABLE t2 UPDATE STATISTICS FOR COLUMN v1 SET('numDVs'='1000000','numNulls'='0','avgColLen'='50.0','maxColLen'='100') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t2 +POSTHOOK: Output: default@t2 +PREHOOK: query: EXPLAIN +SELECT a.k, a.total, a.sample, b.v1 +FROM ( + SELECT + k, + SUM(val) as total, + MAX(data) as sample + FROM ( + SELECT + CASE + WHEN cat BETWEEN 0 AND 4 THEN 'K00' + WHEN cat BETWEEN 5 AND 9 THEN 'K01' + WHEN cat BETWEEN 10 AND 14 THEN 'K02' + WHEN cat BETWEEN 15 AND 19 THEN 'K03' + WHEN cat BETWEEN 20 AND 24 THEN 'K04' + WHEN cat BETWEEN 25 AND 29 THEN 'K05' + WHEN cat BETWEEN 30 AND 34 THEN 'K06' + WHEN cat BETWEEN 35 AND 39 THEN 'K07' + WHEN cat BETWEEN 40 AND 44 THEN 'K08' + WHEN cat BETWEEN 45 AND 49 THEN 'K09' + WHEN cat BETWEEN 50 AND 54 THEN 'K10' + WHEN cat BETWEEN 55 AND 59 THEN 'K11' + WHEN cat BETWEEN 60 AND 64 THEN 'K12' + WHEN cat BETWEEN 65 AND 69 THEN 'K13' + WHEN cat BETWEEN 70 AND 74 THEN 'K14' + WHEN cat BETWEEN 75 AND 79 THEN 'K15' + WHEN cat BETWEEN 80 AND 84 THEN 'K16' + WHEN cat BETWEEN 85 AND 89 THEN 'K17' + WHEN cat BETWEEN 90 AND 94 THEN 'K18' + ELSE 'K19' + END as k, + val, + data + FROM t1 + ) s + GROUP BY k +) a +JOIN t2 b ON a.k = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT a.k, a.total, a.sample, b.v1 +FROM ( + SELECT + k, + SUM(val) as total, + MAX(data) as sample + FROM ( + SELECT + CASE + WHEN cat BETWEEN 0 AND 4 THEN 'K00' + WHEN cat BETWEEN 5 AND 9 THEN 'K01' + WHEN cat BETWEEN 10 AND 14 THEN 'K02' + WHEN cat BETWEEN 15 AND 19 THEN 'K03' + WHEN cat BETWEEN 20 AND 24 THEN 'K04' + WHEN cat BETWEEN 25 AND 29 THEN 'K05' + WHEN cat BETWEEN 30 AND 34 THEN 'K06' + WHEN cat BETWEEN 35 AND 39 THEN 'K07' + WHEN cat BETWEEN 40 AND 44 THEN 'K08' + WHEN cat BETWEEN 45 AND 49 THEN 'K09' + WHEN cat BETWEEN 50 AND 54 THEN 'K10' + WHEN cat BETWEEN 55 AND 59 THEN 'K11' + WHEN cat BETWEEN 60 AND 64 THEN 'K12' + WHEN cat BETWEEN 65 AND 69 THEN 'K13' + WHEN cat BETWEEN 70 AND 74 THEN 'K14' + WHEN cat BETWEEN 75 AND 79 THEN 'K15' + WHEN cat BETWEEN 80 AND 84 THEN 'K16' + WHEN cat BETWEEN 85 AND 89 THEN 'K17' + WHEN cat BETWEEN 90 AND 94 THEN 'K18' + ELSE 'K19' + END as k, + val, + data + FROM t1 + ) s + GROUP BY k +) a +JOIN t2 b ON a.k = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1000000 Data size: 596000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN (cat BETWEEN 0 AND 4) THEN ('K00') WHEN (cat BETWEEN 5 AND 9) THEN ('K01') WHEN (cat BETWEEN 10 AND 14) THEN ('K02') WHEN (cat BETWEEN 15 AND 19) THEN ('K03') WHEN (cat BETWEEN 20 AND 24) THEN ('K04') WHEN (cat BETWEEN 25 AND 29) THEN ('K05') WHEN (cat BETWEEN 30 AND 34) THEN ('K06') WHEN (cat BETWEEN 35 AND 39) THEN ('K07') WHEN (cat BETWEEN 40 AND 44) THEN ('K08') WHEN (cat BETWEEN 45 AND 49) THEN ('K09') WHEN (cat BETWEEN 50 AND 54) THEN ('K10') WHEN (cat BETWEEN 55 AND 59) THEN ('K11') WHEN (cat BETWEEN 60 AND 64) THEN ('K12') WHEN (cat BETWEEN 65 AND 69) THEN ('K13') WHEN (cat BETWEEN 70 AND 74) THEN ('K14') WHEN (cat BETWEEN 75 AND 79) THEN ('K15') WHEN (cat BETWEEN 80 AND 84) THEN ('K16') WHEN (cat BETWEEN 85 AND 89) THEN ('K17') WHEN (cat BETWEEN 90 AND 94) THEN ('K18') ELSE ('K19') END (type: string), val (type: bigint), data (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000000 Data size: 596000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), max(_col2) + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500000 Data size: 139500000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500000 Data size: 139500000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 1000000 Data size: 268000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000000 Data size: 268000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), v1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000000 Data size: 268000000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000000 Data size: 268000000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), max(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250000 Data size: 69750000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250000 Data size: 69750000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out index 273a92b28dec..58aa422777d5 100644 --- a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out @@ -198,14 +198,14 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col5 input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:int, val 5), FilterLongColEqualLongScalar(col 7:int, val 10), FilterLongColEqualLongScalar(col 7:bigint, val 571)(children: col 7:int)) predicate: ((_col1 = 5) or (_col5 = 10) or (UDFToLong(_col5) = 571L)) (type: boolean) - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col5 (type: int), if(_col3 is not null, _col3, UDFToInteger(_col4)) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -214,7 +214,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 7, 9] selectExpressions: IfExprColumnCondExpr(col 8:boolean, col 5:intcol 6:smallint)(children: IsNotNull(col 5:int) -> 8:boolean, col 5:int, col 6:smallint) -> 9:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -233,12 +233,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 4 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: - keys: _col0 (type: int) null sort order: z - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -251,7 +251,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5, 9, 7] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -262,7 +262,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 9:int, 7:int - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -406,13 +406,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 922 (type: int), _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -421,13 +421,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 1, 2] selectExpressions: ConstantVectorExpression(val 922) -> 3:int - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat From 199c441dee5bf0dcd9918f3c2bfe0407e6417518 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 18 Dec 2025 17:19:37 -0800 Subject: [PATCH 02/14] HIVE-29368: regenerated impacted test results + added an explanation comment --- .../estimator/PessimisticStatCombiner.java | 8 ++++++++ .../llap/infer_bucket_sort_dyn_part.q.out | 10 +++++----- .../clientpositive/llap/innerjoin1.q.out | 12 +++++------ .../llap/list_bucket_dml_6.q.out | 20 +++++++++---------- .../llap/list_bucket_dml_7.q.out | 20 +++++++++---------- .../llap/list_bucket_dml_8.q.out | 10 +++++----- .../llap/merge_dynamic_partition4.q.out | 10 +++++----- .../llap/merge_dynamic_partition5.q.out | 10 +++++----- 8 files changed, 54 insertions(+), 46 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java index ce9136b0aec2..7b61bc460158 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java @@ -41,7 +41,15 @@ public void add(ColStatistics stat) { if (stat.getAvgColLen() > result.getAvgColLen()) { result.setAvgColLen(stat.getAvgColLen()); } + + // NDVs can only be accurately combined if full information about columns, query branches and + // their relationships is available. Without that info, there is only one "truly conservative" + // value of NDV which is 0, which means that the NDV is unknown. It forces optimized + // to make the most conservative decisions possible, which is the exact goal of + // PessimisticStatCombiner. It does inflate statistics in multiple cases, but at the same time it + // also ensures than the query execution does not "blow up" due to too optimistic stats estimates result.setCountDistint(0L); + if (stat.getNumNulls() > result.getNumNulls()) { result.setNumNulls(stat.getNumNulls()); } diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out index 995733564a08..b07fc4ca6103 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out @@ -492,13 +492,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 158 Data size: 103016 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 158 Data size: 103016 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: vectorized, llap @@ -508,14 +508,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 79 Data size: 40764 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 79 Data size: 56248 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 79 Data size: 56248 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/innerjoin1.q.out b/ql/src/test/results/clientpositive/llap/innerjoin1.q.out index 8e9dbf9b583d..075e9e8985ba 100644 --- a/ql/src/test/results/clientpositive/llap/innerjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/innerjoin1.q.out @@ -142,14 +142,14 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,14 +249,14 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out index dbcf49b202e7..a312142af7c1 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -461,7 +461,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,7 +469,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -564,18 +564,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out index ad7051398156..9a960de8085f 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -461,7 +461,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,7 +469,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -564,18 +564,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out index 148303926d66..979ef4f18835 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out index 2c9c9015c173..13b1ace4f633 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out @@ -180,13 +180,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs @@ -198,14 +198,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out index 5b1e537b938a..e27223e6f3d4 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out @@ -156,13 +156,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 201468 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 201468 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs @@ -174,14 +174,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 154 Data size: 79464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 154 Data size: 109648 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 154 Data size: 109648 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat From f0022f71dc5b1a20df60f25389b6c4a0aa694a65 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 18 Dec 2025 17:22:06 -0800 Subject: [PATCH 03/14] HIVE-29368: one more test file, modified using explain output only for now --- .../clientpositive/llap/subquery_notin.q.out | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index fa79cd87dd13..b86c12d0ea82 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1533,19 +1533,19 @@ STAGE PLANS: Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 105 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 105 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Reducer 2 + Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1555,42 +1555,42 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 166 Data size: 15106 Basic stats: COMPLETE Column stats: COMPLETE + null sort order: + sort order: + Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: boolean) - Reducer 3 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 + 0 + 1 outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: bigint), _col4 (type: bigint), _col2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0L) or (_col4 is null and (_col2 >= _col1) and _col0 is not null)) (type: boolean) - Statistics: Num rows: 166 Data size: 17762 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1599,28 +1599,28 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - null sort order: - sort order: + null sort order: + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 105 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 105 Data size: 507 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 105 Data size: 507 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -1630,7 +1630,7 @@ STAGE PLANS: ListSink Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: select * +PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY PREHOOK: Input: default@src From bd86e3c4debec826de636efdd10625da24fc2739 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 18 Dec 2025 23:05:33 -0800 Subject: [PATCH 04/14] HIVE-29368: only increment ndv by one inextractNDVGroupingColumns() if it is "known" --- .../hadoop/hive/ql/stats/StatsUtils.java | 3 ++- .../clientpositive/llap/subquery_notin.q.out | 24 +++++++++---------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index c530633fbf1c..d71f7f9077b4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -2087,7 +2087,8 @@ private static List extractNDVGroupingColumns(List colStats for (ColStatistics cs : colStats) { if (cs != null) { long ndv = cs.getCountDistint(); - if (cs.getNumNulls() > 0) { + // Only increment ndv value if it is "known" + if (ndv > 0 && cs.getNumNulls() > 0) { ndv = StatsUtils.safeAdd(ndv, 1); } ndvValues.add(ndv); diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index b86c12d0ea82..1de46e04de66 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1536,13 +1536,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1555,11 +1555,11 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 230 Data size: 20270 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 230 Data size: 20270 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: boolean) Reducer 3 Execution mode: llap @@ -1571,21 +1571,21 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: bigint), _col4 (type: bigint), _col2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0L) or (_col4 is null and (_col2 >= _col1) and _col0 is not null)) (type: boolean) - Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 230 Data size: 20010 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 230 Data size: 20010 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1610,17 +1610,17 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 251 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 41 Data size: 251 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 From 75dbdf8d57c5cf4abaf6e307c2f88e1fdddf52c0 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Fri, 19 Dec 2025 13:07:37 -0800 Subject: [PATCH 05/14] HIVE-29368: further tuning NDV handling, including reading stats for timestamp/date columns --- .../hadoop/hive/ql/stats/StatsUtils.java | 15 +++- .../hadoop/hive/ql/stats/TestStatsUtils.java | 77 +++++++++++++++++++ .../llap/parquet_vectorization_15.q.out | 14 ++-- .../llap/parquet_vectorization_16.q.out | 10 +-- .../llap/parquet_vectorization_9.q.out | 10 +-- ...ecial_character_in_tabnames_quotes_1.q.out | 2 +- .../llap/vectorization_15.q.out | 14 ++-- .../llap/vectorization_16.q.out | 10 +-- .../clientpositive/llap/vectorization_9.q.out | 10 +-- .../llap/vectorization_short_regress.q.out | 24 +++--- .../llap/vectorized_stats.q.out | 18 ++--- 11 files changed, 145 insertions(+), 59 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index d71f7f9077b4..07d616602c90 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -832,6 +832,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col cs.setNumNulls(csd.getBinaryStats().getNumNulls()); } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + cs.setCountDistint(csd.getTimestampStats().getNumDVs()); cs.setNumNulls(csd.getTimestampStats().getNumNulls()); Long lowVal = (csd.getTimestampStats().getLowValue() != null) ? csd.getTimestampStats().getLowValue() .getSecondsSinceEpoch() : null; @@ -862,6 +863,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col cs.setHistogram(csd.getDecimalStats().getHistogram()); } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDate()); + cs.setCountDistint(csd.getDateStats().getNumDVs()); cs.setNumNulls(csd.getDateStats().getNumNulls()); Long lowVal = (csd.getDateStats().getLowValue() != null) ? csd.getDateStats().getLowValue() .getDaysSinceEpoch() : null; @@ -2087,9 +2089,16 @@ private static List extractNDVGroupingColumns(List colStats for (ColStatistics cs : colStats) { if (cs != null) { long ndv = cs.getCountDistint(); - // Only increment ndv value if it is "known" - if (ndv > 0 && cs.getNumNulls() > 0) { - ndv = StatsUtils.safeAdd(ndv, 1); + + if (ndv == 0L) { + // Typically, ndv == 0 means "NDV unknown", and no safe GROUPBY adjustments are possible + // However, there is a special exception for "constant NULL" columns. They are intentionally generated + // with NDV values of 0 and numNulls == numRows, while their actual NDV is 1 + if (cs.getNumNulls() >= parentStats.getNumRows()) { + ndv = 1L; + } + } else if (cs.getNumNulls() > 0L) { + ndv = StatsUtils.safeAdd(ndv, 1L); } ndvValues.add(ndv); } else { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java index 5701fc40581e..8a3dd1cea14e 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java @@ -25,6 +25,8 @@ import java.lang.reflect.Field; import java.lang.reflect.Modifier; +import java.util.Collections; +import java.util.List; import java.util.Set; import java.util.stream.Stream; @@ -32,10 +34,15 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Date; +import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Timestamp; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; +import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.serde.serdeConstants; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; @@ -244,4 +251,74 @@ static Stream floatingPointStatisticsTestData() { ); } + @Test + void testGetColStatisticsTimestampType() { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("ts_col"); + cso.setColType(serdeConstants.TIMESTAMP_TYPE_NAME); + + TimestampColumnStatsData tsStats = new TimestampColumnStatsData(); + tsStats.setNumDVs(35); + tsStats.setNumNulls(5); + tsStats.setLowValue(new Timestamp(1000)); + tsStats.setHighValue(new Timestamp(2000)); + + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setTimestampStats(tsStats); + cso.setStatsData(data); + + ColStatistics cs = StatsUtils.getColStatistics(cso, "ts_col"); + + assertNotNull(cs, "ColStatistics should not be null"); + assertEquals(35, cs.getCountDistint(), "TIMESTAMP NumDVs should be extracted from metastore stats"); + assertEquals(5, cs.getNumNulls(), "NumNulls mismatch"); + } + + @Test + void testGetColStatisticsDateType() { + ColumnStatisticsObj cso = new ColumnStatisticsObj(); + cso.setColName("date_col"); + cso.setColType(serdeConstants.DATE_TYPE_NAME); + + DateColumnStatsData dateStats = new DateColumnStatsData(); + dateStats.setNumDVs(42); + dateStats.setNumNulls(3); + dateStats.setLowValue(new Date(18000)); + dateStats.setHighValue(new Date(19000)); + + ColumnStatisticsData data = new ColumnStatisticsData(); + data.setDateStats(dateStats); + cso.setStatsData(data); + + ColStatistics cs = StatsUtils.getColStatistics(cso, "date_col"); + + assertNotNull(cs, "ColStatistics should not be null"); + assertEquals(42, cs.getCountDistint(), "DATE NumDVs should be extracted from metastore stats"); + assertEquals(3, cs.getNumNulls(), "NumNulls mismatch"); + } + + private ColStatistics createColStats(String name, long ndv, long numNulls) { + ColStatistics cs = new ColStatistics(name, "string"); + cs.setCountDistint(ndv); + cs.setNumNulls(numNulls); + return cs; + } + + private Statistics createParentStats(long numRows) { + Statistics stats = new Statistics(numRows, 0, 0, 0); + stats.setColumnStatsState(Statistics.State.COMPLETE); + return stats; + } + + @Test + void testComputeNDVGroupingColumnsPartialStats() { + ColStatistics cs = createColStats("partial_stats_col", 0, 100); + Statistics parentStats = createParentStats(1000); + List colStats = Collections.singletonList(cs); + + long ndv = StatsUtils.computeNDVGroupingColumns(colStats, parentStats, false); + + assertEquals(0, ndv, "Partial stats (ndv=0, numNulls 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 13:double, col 16:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double) -> 13:double, IfExprNullCondExpr(col 14:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 14:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 20:double)(children: DoubleColDivideLongColumn(col 19:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 19:double) -> 20:double) -> 21:double) -> 22:double, IfExprNullCondExpr(col 23:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 26:double) -> 27:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 37:double, col 38:double)(children: FuncPowerDoubleToDouble(col 36:double)(children: DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double) -> 37:double, CastLongToDouble(col 3:bigint) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 40:double, DecimalColDivideDecimalScalar(col 41:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 41:decimal(19,0)) -> 42:decimal(28,6), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 49:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, IfExprNullCondExpr(col 47:boolean, null, col 48:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 47:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 48:bigint) -> 49:bigint) -> 50:double) -> 51:double - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out index eeab9c89af72..b3c24ec4c133 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_9.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) null sort order: zzz @@ -106,7 +106,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -141,7 +141,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double), (- power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5)) (type: double), (power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -150,13 +150,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 18, 28, 39, 6, 40, 42, 51] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 13:double, col 16:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double) -> 13:double, IfExprNullCondExpr(col 14:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 14:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 20:double)(children: DoubleColDivideLongColumn(col 19:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 19:double) -> 20:double) -> 21:double) -> 22:double, IfExprNullCondExpr(col 23:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 26:double) -> 27:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 37:double, col 38:double)(children: FuncPowerDoubleToDouble(col 36:double)(children: DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double) -> 37:double, CastLongToDouble(col 3:bigint) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 40:double, DecimalColDivideDecimalScalar(col 41:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 41:decimal(19,0)) -> 42:decimal(28,6), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 49:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, IfExprNullCondExpr(col 47:boolean, null, col 48:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 47:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 48:bigint) -> 49:bigint) -> 50:double) -> 51:double - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out index a2d955298324..e5c937ec4dd5 100644 --- a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out +++ b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out @@ -3837,7 +3837,7 @@ STAGE PLANS: Group By Operator aggregations: max(p_retailprice), min(p_retailprice) keys: null (type: string) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 6732aba7edd2..ee3b51fb93fc 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -120,7 +120,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) null sort order: zzzzzzz @@ -132,7 +132,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:double, 13:bigint, 14:double, 15:double, 16:bigint - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -163,16 +163,16 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2432638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / if((_col9 = 1L), null, (_col9 - 1))), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / if((_col13 = 1L), null, (_col13 - 1))), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / if((_col16 = 1L), null, (_col16 - 1))) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) null sort order: zzzzzzz sort order: +++++++ - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: llap @@ -184,10 +184,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5185150 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 7e8cb81144fc..670bf936bfbd 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -97,7 +97,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) null sort order: zzz @@ -109,7 +109,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 3:bigint, 4:double, 5:double, 6:double - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -157,7 +157,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double), (- power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5)) (type: double), (power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -166,13 +166,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 18, 28, 39, 6, 40, 42, 51] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 13:double, col 16:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double) -> 13:double, IfExprNullCondExpr(col 14:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 14:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 20:double)(children: DoubleColDivideLongColumn(col 19:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 19:double) -> 20:double) -> 21:double) -> 22:double, IfExprNullCondExpr(col 23:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 26:double) -> 27:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 37:double, col 38:double)(children: FuncPowerDoubleToDouble(col 36:double)(children: DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double) -> 37:double, CastLongToDouble(col 3:bigint) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 40:double, DecimalColDivideDecimalScalar(col 41:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 41:decimal(19,0)) -> 42:decimal(28,6), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 49:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, IfExprNullCondExpr(col 47:boolean, null, col 48:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 47:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 48:bigint) -> 49:bigint) -> 50:double) -> 51:double - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 7e8cb81144fc..670bf936bfbd 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -97,7 +97,7 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) null sort order: zzz @@ -109,7 +109,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 3:bigint, 4:double, 5:double, 6:double - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -157,7 +157,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 5979 Data size: 825318 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 848064 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double), (- power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5)) (type: double), (power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power((greatest(0,(_col4 - ((_col5 * _col5) / _col3))) / if((_col3 = 1L), null, (_col3 - 1))), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -166,13 +166,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 18, 28, 39, 6, 40, 42, 51] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 13:double, col 16:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double) -> 13:double, IfExprNullCondExpr(col 14:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 14:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 20:double)(children: DoubleColDivideLongColumn(col 19:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 19:double) -> 20:double) -> 21:double) -> 22:double, IfExprNullCondExpr(col 23:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 26:double) -> 27:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 37:double, col 38:double)(children: FuncPowerDoubleToDouble(col 36:double)(children: DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double) -> 37:double, CastLongToDouble(col 3:bigint) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 40:double, DecimalColDivideDecimalScalar(col 41:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 41:decimal(19,0)) -> 42:decimal(28,6), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 49:bigint)(children: VectorUDFAdaptor(greatest(0,(_col4 - ((_col5 * _col5) / _col3))))(children: DoubleColSubtractDoubleColumn(col 4:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, IfExprNullCondExpr(col 47:boolean, null, col 48:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 47:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 48:bigint) -> 49:bigint) -> 50:double) -> 51:double - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5979 Data size: 1734126 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1781952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index da82903d7963..418538ff44de 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -635,7 +635,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((ctimestamp1 = ctimestamp2) or ((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) + filterExpr: (((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or (ctimestamp1 = ctimestamp2) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -643,8 +643,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8:timestamp, col 9:timestamp), FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterLongColEqualLongScalar(col 11:boolean, val 1)), FilterExprAndExpr(children: FilterStringGroupColGreaterStringScalar(col 7:string, val a), SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 9:timestamp)), FilterDoubleColEqualDoubleScalar(col 4:float, val 762.0), FilterStringGroupColEqualStringScalar(col 6:string, val ss)) - predicate: ((ctimestamp1 = ctimestamp2) or ((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterLongColEqualLongScalar(col 11:boolean, val 1)), FilterTimestampColEqualTimestampColumn(col 8:timestamp, col 9:timestamp), FilterExprAndExpr(children: FilterStringGroupColGreaterStringScalar(col 7:string, val a), SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 9:timestamp)), FilterDoubleColEqualDoubleScalar(col 4:float, val 762.0), FilterStringGroupColEqualStringScalar(col 6:string, val ss)) + predicate: (((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or (ctimestamp1 = ctimestamp2) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) Statistics: Num rows: 10571 Data size: 2446670 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), (cdouble * cdouble) (type: double) @@ -2949,10 +2949,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col0 (type: timestamp), _col1 (type: string) - minReductionHashAggr: 0.5133463 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3244642 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string) null sort order: zz @@ -2962,7 +2962,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3244642 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: tinyint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: bigint), _col16 (type: bigint), _col17 (type: double), _col18 (type: bigint), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2997,7 +2997,7 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 5980 Data size: 1579124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3244642 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: timestamp), _col1 (type: string), power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) (type: double), (UDFToDouble(_col5) / _col6) (type: double), _col7 (type: bigint), _col8 (type: tinyint), ((_col9 - ((_col10 * _col10) / _col11)) / if((_col11 = 1L), null, (_col11 - 1))) (type: double), ((_col12 - ((_col13 * _col13) / _col14)) / _col14) (type: double), (UDFToDouble(_col15) / _col16) (type: double), ((_col12 - ((_col13 * _col13) / _col14)) / if((_col14 = 1L), null, (_col14 - 1))) (type: double), (_col17 / _col18) (type: double), _col19 (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), power(((_col20 - ((_col21 * _col21) / _col22)) / _col22), 0.5) (type: double), _col15 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -3006,12 +3006,12 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 27, 29, 7, 8, 36, 40, 42, 49, 50, 19, 54, 59, 15] selectExpressions: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 24:double)(children: DoubleColDivideLongColumn(col 23:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 23:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double, DoubleColDivideLongColumn(col 28:double, col 6:bigint)(children: CastLongToDouble(col 5:bigint) -> 28:double) -> 29:double, DoubleColDivideLongColumn(col 32:double, col 35:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 30:double) -> 31:double) -> 32:double, IfExprNullCondExpr(col 33:boolean, null, col 34:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 34:bigint) -> 35:bigint) -> 36:double, DoubleColDivideLongColumn(col 39:double, col 14:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 14:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 37:double) -> 38:double) -> 39:double) -> 40:double, DoubleColDivideLongColumn(col 41:double, col 16:bigint)(children: CastLongToDouble(col 15:bigint) -> 41:double) -> 42:double, DoubleColDivideLongColumn(col 45:double, col 48:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 14:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 43:double) -> 44:double) -> 45:double, IfExprNullCondExpr(col 46:boolean, null, col 47:bigint)(children: LongColEqualLongScalar(col 14:bigint, val 1) -> 46:boolean, LongColSubtractLongScalar(col 14:bigint, val 1) -> 47:bigint) -> 48:bigint) -> 49:double, DoubleColDivideLongColumn(col 17:double, col 18:bigint) -> 50:double, DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 52:double)(children: DoubleColDivideLongColumn(col 51:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 51:double) -> 52:double) -> 53:double) -> 54:double, FuncPowerDoubleToDouble(col 58:double)(children: DoubleColDivideLongColumn(col 57:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 20:double, col 56:double)(children: DoubleColDivideLongColumn(col 55:double, col 22:bigint)(children: DoubleColMultiplyDoubleColumn(col 21:double, col 21:double) -> 55:double) -> 56:double) -> 57:double) -> 58:double) -> 59:double - Statistics: Num rows: 5980 Data size: 1196404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2458210 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: +++++++++++++++++++++++++++++++++++++++ keys: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175D) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28D - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28D - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175D)) (type: double), _col6 (type: double), (_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), _col2 (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175D / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (_col2 * 10.175D) (type: double), _col10 (type: double), (((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175D) (type: double), (10.175D % (10.175D / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28D - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), _col4 (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28D) (type: double) null sort order: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz - Statistics: Num rows: 5980 Data size: 1196404 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2458210 Basic stats: COMPLETE Column stats: COMPLETE top n: 50 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -3025,7 +3025,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 27, 23, 24, 29, 25, 26, 7, 35, 31, 8, 30, 32, 36, 28, 27, 38, 40, 37, 42, 49, 41, 39, 50, 43, 45, 48, 19, 54, 44, 52, 145, 59, 15, 53, 7, 7, 55] selectExpressions: DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 23:double, DoubleColUnaryMinus(col 27:double) -> 24:double, DoubleColUnaryMinus(col 27:double) -> 25:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 26:double, LongColUnaryMinus(col 7:bigint) -> 35:bigint, DoubleColMultiplyDoubleColumn(col 28:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 30:double) -> 31:double, DoubleColMultiplyDoubleColumn(col 32:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 30:double) -> 32:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 28:double) -> 30:double, DoubleColUnaryMinus(col 28:double)(children: DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 28:double) -> 32:double, DoubleColAddDoubleColumn(col 36:double, col 37:double)(children: DoubleColMultiplyDoubleColumn(col 38:double, col 28:double)(children: DoubleColMultiplyDoubleColumn(col 28:double, col 37:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 28:double, DoubleColUnaryMinus(col 27:double) -> 37:double) -> 38:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 28:double) -> 37:double) -> 28:double, DoubleColDivideDoubleColumn(col 37:double, col 27:double)(children: CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 37:double) -> 38:double, DoubleScalarDivideDoubleColumn(val 10.175, col 29:double) -> 37:double, DoubleColSubtractDoubleColumn(col 39:double, col 43:double)(children: DoubleColAddDoubleColumn(col 36:double, col 41:double)(children: DoubleColMultiplyDoubleColumn(col 43:double, col 39:double)(children: DoubleColMultiplyDoubleColumn(col 39:double, col 41:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 39:double, DoubleColUnaryMinus(col 27:double) -> 41:double) -> 43:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 39:double) -> 41:double) -> 39:double, DoubleColMultiplyDoubleColumn(col 44:double, col 41:double)(children: DoubleColMultiplyDoubleColumn(col 41:double, col 43:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 41:double, DoubleColUnaryMinus(col 27:double) -> 43:double) -> 44:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 41:double) -> 43:double) -> 41:double, DoubleColMultiplyDoubleScalar(col 27:double, val 10.175) -> 39:double, DoubleColMultiplyDoubleScalar(col 44:double, val 10.175)(children: DoubleColSubtractDoubleColumn(col 43:double, col 45:double)(children: DoubleColAddDoubleColumn(col 36:double, col 44:double)(children: DoubleColMultiplyDoubleColumn(col 45:double, col 43:double)(children: DoubleColMultiplyDoubleColumn(col 43:double, col 44:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 43:double, DoubleColUnaryMinus(col 27:double) -> 44:double) -> 45:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 43:double) -> 44:double) -> 43:double, DoubleColMultiplyDoubleColumn(col 51:double, col 44:double)(children: DoubleColMultiplyDoubleColumn(col 44:double, col 45:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 44:double, DoubleColUnaryMinus(col 27:double) -> 45:double) -> 51:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 44:double) -> 45:double) -> 44:double) -> 43:double, DoubleScalarModuloDoubleColumn(val 10.175, col 44:double)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 29:double) -> 44:double) -> 45:double, LongColUnaryMinus(col 8:tinyint) -> 48:tinyint, DoubleColUnaryMinus(col 52:double)(children: DoubleColMultiplyDoubleColumn(col 44:double, col 51:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 44:double, DoubleColUnaryMinus(col 27:double) -> 51:double) -> 52:double) -> 44:double, DoubleColModuloDoubleColumn(col 51:double, col 50:double)(children: DoubleColUnaryMinus(col 27:double) -> 51:double) -> 52:double, DecimalScalarDivideDecimalColumn(val -26.28, col 127:decimal(3,0))(children: CastLongToDecimal(col 71:tinyint)(children: LongColUnaryMinus(col 8:tinyint) -> 71:tinyint) -> 127:decimal(3,0)) -> 145:decimal(8,6), DoubleColDivideDoubleColumn(col 51:double, col 40:double)(children: DoubleColAddDoubleColumn(col 36:double, col 53:double)(children: DoubleColMultiplyDoubleColumn(col 55:double, col 51:double)(children: DoubleColMultiplyDoubleColumn(col 51:double, col 53:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 51:double, DoubleColUnaryMinus(col 27:double) -> 53:double) -> 55:double, CastLongToDouble(col 71:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 71:bigint) -> 51:double) -> 53:double) -> 51:double) -> 53:double, DoubleColModuloDoubleScalar(col 51:double, val -26.28)(children: DoubleColAddDoubleColumn(col 36:double, col 55:double)(children: DoubleColMultiplyDoubleColumn(col 56:double, col 51:double)(children: DoubleColMultiplyDoubleColumn(col 51:double, col 55:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 27:double) -> 51:double, DoubleColUnaryMinus(col 27:double) -> 55:double) -> 56:double, CastLongToDouble(col 71:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 71:bigint) -> 51:double) -> 55:double) -> 51:double) -> 55:double - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5628990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) null sort order: zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz @@ -3034,7 +3034,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5628990 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3051,7 +3051,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 2, 17, 18, 19, 20, 21, 22, 3, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 8, 8, 38] - Statistics: Num rows: 5980 Data size: 2739514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 5628990 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out b/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out index af0c461861f3..63f97eeca17c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_stats.q.out @@ -1207,13 +1207,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: timestamp) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1223,10 +1223,10 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 36 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1826,16 +1826,16 @@ STAGE PLANS: Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: val1 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 50 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 95 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 50 Data size: 2800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 95 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1845,10 +1845,10 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 95 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 95 Data size: 5320 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat From 8b361def8bf3956e38737c8e5094c864027d42e8 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Tue, 23 Dec 2025 14:16:26 -0800 Subject: [PATCH 06/14] HIVE-29368: impacted test .out files --- .../llap/iceberg_bucket_map_join_1.q.out | 48 +- .../llap/bucket_map_join_tez3.q.out | 92 +-- .../clientpositive/llap/mapjoin_date.q.out | 6 +- .../llap/materialized_view_rewrite_8.q.out | 2 +- .../llap/materialized_view_rewrite_9.q.out | 6 +- .../llap/parquet_vectorization_11.q.out | 6 +- .../llap/parquet_vectorization_12.q.out | 6 +- .../llap/parquet_vectorization_3.q.out | 6 +- .../llap/scratch_col_reused_by_child.q.out | 2 +- ...ecial_character_in_tabnames_quotes_1.q.out | 2 +- .../clientpositive/llap/subquery_notin.q.out | 46 +- .../llap/tez_dynpart_hashjoin_4.q.out | 118 ++-- .../llap/tez_fixed_bucket_pruning.q.out | 16 +- .../results/clientpositive/llap/tpch18.q.out | 19 +- .../llap/vector_between_in.q.out | 24 +- .../clientpositive/llap/vector_date_1.q.out | 6 +- .../llap/vector_interval_mapjoin.q.out | 6 +- .../llap/vector_outer_join3.q.out | 36 +- .../llap/vector_outer_join4.q.out | 28 +- .../llap/vector_outer_join_constants.q.out | 382 ++++++----- .../llap/vector_partitioned_date_time.q.out | 28 +- .../llap/vector_ptf_bounded_start.q.out | 24 +- .../llap/vector_string_concat.q.out | 12 +- .../llap/vectorization_11.q.out | 6 +- .../llap/vectorization_12.q.out | 6 +- .../clientpositive/llap/vectorization_3.q.out | 8 +- ...ctorized_dynamic_semijoin_reduction2.q.out | 8 +- .../llap/vectorized_timestamp.q.out | 6 +- .../perf/tpcds30tb/cte/cbo_query12.q.out | 18 +- .../perf/tpcds30tb/cte/cbo_query16.q.out | 26 +- .../perf/tpcds30tb/cte/cbo_query20.q.out | 18 +- .../perf/tpcds30tb/cte/cbo_query21.q.out | 24 +- .../perf/tpcds30tb/cte/cbo_query23.q.out | 4 +- .../perf/tpcds30tb/cte/cbo_query32.q.out | 18 +- .../perf/tpcds30tb/cte/cbo_query37.q.out | 12 +- .../perf/tpcds30tb/cte/cbo_query40.q.out | 18 +- .../perf/tpcds30tb/cte/cbo_query5.q.out | 32 +- .../perf/tpcds30tb/cte/cbo_query58.q.out | 51 +- .../perf/tpcds30tb/cte/cbo_query80.q.out | 72 +- .../perf/tpcds30tb/cte/cbo_query82.q.out | 12 +- .../perf/tpcds30tb/cte/cbo_query92.q.out | 40 +- .../perf/tpcds30tb/cte/cbo_query94.q.out | 26 +- .../perf/tpcds30tb/cte/cbo_query95.q.out | 26 +- .../perf/tpcds30tb/cte/cbo_query98.q.out | 18 +- .../perf/tpcds30tb/tez/cbo_query12.q.out | 18 +- .../perf/tpcds30tb/tez/cbo_query16.q.out | 26 +- .../perf/tpcds30tb/tez/cbo_query20.q.out | 18 +- .../perf/tpcds30tb/tez/cbo_query21.q.out | 24 +- .../perf/tpcds30tb/tez/cbo_query32.q.out | 18 +- .../perf/tpcds30tb/tez/cbo_query37.q.out | 12 +- .../perf/tpcds30tb/tez/cbo_query40.q.out | 18 +- .../perf/tpcds30tb/tez/cbo_query5.q.out | 48 +- .../perf/tpcds30tb/tez/cbo_query58.q.out | 39 +- .../perf/tpcds30tb/tez/cbo_query80.q.out | 72 +- .../perf/tpcds30tb/tez/cbo_query82.q.out | 12 +- .../perf/tpcds30tb/tez/cbo_query92.q.out | 40 +- .../perf/tpcds30tb/tez/cbo_query94.q.out | 26 +- .../perf/tpcds30tb/tez/cbo_query95.q.out | 26 +- .../perf/tpcds30tb/tez/cbo_query98.q.out | 18 +- .../perf/tpcds30tb/tez/query12.q.out | 60 +- .../perf/tpcds30tb/tez/query16.q.out | 64 +- .../perf/tpcds30tb/tez/query20.q.out | 60 +- .../perf/tpcds30tb/tez/query21.q.out | 78 +-- .../perf/tpcds30tb/tez/query23.q.out | 22 +- .../perf/tpcds30tb/tez/query32.q.out | 86 ++- .../perf/tpcds30tb/tez/query38.q.out | 46 +- .../perf/tpcds30tb/tez/query40.q.out | 120 ++-- .../perf/tpcds30tb/tez/query5.q.out | 214 +++--- .../perf/tpcds30tb/tez/query51.q.out | 42 +- .../perf/tpcds30tb/tez/query58.q.out | 639 +++++++++++------- .../perf/tpcds30tb/tez/query80.q.out | 436 ++++++------ .../perf/tpcds30tb/tez/query83.q.out | 399 ++++++----- .../perf/tpcds30tb/tez/query87.q.out | 78 +-- .../perf/tpcds30tb/tez/query92.q.out | 252 +++---- .../perf/tpcds30tb/tez/query94.q.out | 64 +- .../perf/tpcds30tb/tez/query95.q.out | 64 +- .../perf/tpcds30tb/tez/query98.q.out | 60 +- 77 files changed, 2457 insertions(+), 2112 deletions(-) diff --git a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out index aedc7fc43de6..f8dfb22e5fa1 100644 --- a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out +++ b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_bucket_map_join_1.q.out @@ -111,9 +111,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=30 width=520) + Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=30 width=336) + Map Join Operator [MAPJOIN_45] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] llap MULTICAST [RS_7] @@ -175,19 +175,19 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=10 width=520) + Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=10 width=336) + Map Join Operator [MAPJOIN_49] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] llap MULTICAST [RS_11] PartitionCols:_col1 - Group By Operator [GBY_8] (rows=1 width=168) + Group By Operator [GBY_8] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=1 width=168) + Group By Operator [GBY_6] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_5] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -245,9 +245,9 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=30 width=520) + Select Operator [SEL_9] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_45] (rows=30 width=336) + Map Join Operator [MAPJOIN_45] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_7._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] llap BROADCAST [RS_7] @@ -309,19 +309,19 @@ Stage-0 Stage-1 Map 1 llap File Output Operator [FS_14] - Select Operator [SEL_13] (rows=10 width=520) + Select Operator [SEL_13] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_49] (rows=10 width=336) + Map Join Operator [MAPJOIN_49] (rows=3 width=336) Conds:SEL_2._col0, _col1=RS_11._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] llap BROADCAST [RS_11] PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=1 width=168) + Group By Operator [GBY_8] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0, _col1 - Group By Operator [GBY_6] (rows=1 width=168) + Group By Operator [GBY_6] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_5] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -379,9 +379,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=30 width=520) + Select Operator [SEL_53] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=30 width=336) + Map Join Operator [MAPJOIN_52] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_49] @@ -443,19 +443,19 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=10 width=520) + Select Operator [SEL_60] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=10 width=336) + Map Join Operator [MAPJOIN_59] (rows=3 width=336) BucketMapJoin:true,Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_EDGE] vectorized, llap MULTICAST [RS_56] PartitionCols:_col1 - Group By Operator [GBY_55] (rows=1 width=168) + Group By Operator [GBY_55] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_54] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=1 width=168) + Group By Operator [GBY_53] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_52] (rows=3 width=168) Output:["date_col","decimal_col"] @@ -513,9 +513,9 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_54] - Select Operator [SEL_53] (rows=30 width=520) + Select Operator [SEL_53] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_52] (rows=30 width=336) + Map Join Operator [MAPJOIN_52] (rows=3 width=336) Conds:SEL_51._col0, _col1=RS_49._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Map 2 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_49] @@ -577,19 +577,19 @@ Stage-0 Stage-1 Map 1 vectorized, llap File Output Operator [FS_61] - Select Operator [SEL_60] (rows=10 width=520) + Select Operator [SEL_60] (rows=3 width=520) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Map Join Operator [MAPJOIN_59] (rows=10 width=336) + Map Join Operator [MAPJOIN_59] (rows=3 width=336) Conds:SEL_58._col0, _col1=RS_56._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3"] <-Reducer 3 [BROADCAST_EDGE] vectorized, llap BROADCAST [RS_56] PartitionCols:_col0, _col1 - Group By Operator [GBY_55] (rows=1 width=168) + Group By Operator [GBY_55] (rows=3 width=168) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 <-Map 2 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_54] PartitionCols:_col0, _col1 - Group By Operator [GBY_53] (rows=1 width=168) + Group By Operator [GBY_53] (rows=3 width=168) Output:["_col0","_col1"],keys:date_col, decimal_col Select Operator [SEL_52] (rows=3 width=168) Output:["date_col","decimal_col"] diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out index 270f527e890f..56b18f1c9766 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez3.q.out @@ -151,19 +151,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -380,19 +380,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -467,10 +467,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -478,7 +478,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Execution mode: llap @@ -533,7 +533,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -541,7 +541,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: false @@ -628,18 +628,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -848,7 +848,7 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Reducer 3 => 1 + Estimated key counts: Reducer 3 => 3 keys: 0 _col0 (type: date), _col1 (type: decimal(38,0)) 1 _col0 (type: date), _col1 (type: decimal(38,0)) @@ -856,18 +856,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -942,10 +942,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -953,7 +953,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: llap @@ -1008,7 +1008,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1016,7 +1016,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true @@ -1103,19 +1103,19 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1332,19 +1332,19 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE BucketMapJoin: true Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1419,10 +1419,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1430,7 +1430,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: false Execution mode: vectorized, llap @@ -1485,7 +1485,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1493,7 +1493,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: false @@ -1580,18 +1580,18 @@ STAGE PLANS: input vertices: 1 Map 2 Position of Big Table: 0 - Statistics: Num rows: 30 Data size: 10080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 30 Data size: 15600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1800,7 +1800,7 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Reducer 3 => 1 + Estimated key counts: Reducer 3 => 3 keys: 0 _col0 (type: date), _col1 (type: decimal(38,0)) 1 _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1808,18 +1808,18 @@ STAGE PLANS: input vertices: 1 Reducer 3 Position of Big Table: 0 - Statistics: Num rows: 10 Data size: 3360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: date), 'pipeline' (type: string), _col1 (type: decimal(38,0)), _col2 (type: date), 'pipeline' (type: string), _col3 (type: decimal(38,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 5200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1560 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1894,10 +1894,10 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: date_col (type: date), decimal_col (type: decimal(38,0)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1905,7 +1905,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: vectorized, llap @@ -1960,7 +1960,7 @@ STAGE PLANS: keys: KEY._col0 (type: date), KEY._col1 (type: decimal(38,0)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: date), _col1 (type: decimal(38,0)) @@ -1968,7 +1968,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: date), _col1 (type: decimal(38,0)) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out index c5dfc75a5f30..c426f13591b6 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_date.q.out @@ -45,7 +45,7 @@ STAGE PLANS: TableScan alias: p1 filterExpr: birthdate is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:birthdate, smallTablePos:1, keyRatio:1.0 Statistics: Num rows: 2 Data size: 296 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -85,13 +85,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 1184 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 592 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_8.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_8.q.out index caec28516823..b8ec571b2e0d 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_8.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_8.q.out @@ -335,7 +335,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: my_date (type: date), my_id2 (type: bigint), environment (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_9.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_9.q.out index becfffca3e67..b7d946404c30 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_9.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_9.q.out @@ -112,7 +112,7 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) keys: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE @@ -232,7 +232,7 @@ STAGE PLANS: Group By Operator aggregations: sum(_col0) keys: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE @@ -349,7 +349,7 @@ STAGE PLANS: Group By Operator aggregations: sum(_col4) keys: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: string), _col3 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 151 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out index 23c7304f52e2..96e5976e66de 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_11.q.out @@ -49,7 +49,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) + filterExpr: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2256914 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -57,8 +57,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a)), FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string)) - predicate: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string), FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 7701 Data size: 1414500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out index 1ba06844f9dd..02fde23144f2 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_12.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) + filterExpr: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean) Statistics: Num rows: 12288 Data size: 1522994 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -95,8 +95,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), SelectColumnIsNull(col 8:timestamp)) - predicate: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint))) + predicate: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean) Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out index d75a945ac003..25e7197cedb8 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_vectorization_3.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesparquet - filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) + filterExpr: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 12288 Data size: 1027540 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -81,8 +81,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 14:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 14:float), FilterDecimalColNotEqualDecimalScalar(col 15:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 15:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 16:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 16:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 17:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 17:double), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 18:decimal(8,3)/DECIMAL_64, val 79553)(children: CastLongToDecimal64(col 1:smallint) -> 18:decimal(8,3)/DECIMAL_64), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) - predicate: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 15:decimal(8,3)/DECIMAL_64, val 79553)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(8,3)/DECIMAL_64), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float), FilterDecimalColNotEqualDecimalScalar(col 17:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 17:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 18:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 18:double))) + predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/scratch_col_reused_by_child.q.out b/ql/src/test/results/clientpositive/llap/scratch_col_reused_by_child.q.out index a769d77d2c05..a6965593f68c 100644 --- a/ql/src/test/results/clientpositive/llap/scratch_col_reused_by_child.q.out +++ b/ql/src/test/results/clientpositive/llap/scratch_col_reused_by_child.q.out @@ -187,7 +187,7 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out index e5c937ec4dd5..a2d955298324 100644 --- a/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out +++ b/ql/src/test/results/clientpositive/llap/special_character_in_tabnames_quotes_1.q.out @@ -3837,7 +3837,7 @@ STAGE PLANS: Group By Operator aggregations: max(p_retailprice), min(p_retailprice) keys: null (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 1de46e04de66..6801074f4e1e 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -1536,16 +1536,16 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 83 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs - Reducer 2 + Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1555,42 +1555,42 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 230 Data size: 20270 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 230 Data size: 20270 Basic stats: COMPLETE Column stats: COMPLETE + null sort order: + sort order: + Statistics: Num rows: 167 Data size: 14537 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: boolean) - Reducer 3 + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 + 0 + 1 outputColumnNames: _col0, _col2, _col3, _col4 - Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: bigint), _col4 (type: bigint), _col2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0L) or (_col4 is null and (_col2 >= _col1) and _col0 is not null)) (type: boolean) - Statistics: Num rows: 230 Data size: 23950 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 17209 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 230 Data size: 20010 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 230 Data size: 20010 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 167 Data size: 14529 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 + Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -1599,28 +1599,28 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - null sort order: - sort order: + null sort order: + sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 41 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 41 Data size: 251 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 41 Data size: 251 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -1630,7 +1630,7 @@ STAGE PLANS: ListSink Warning: Shuffle Join MERGEJOIN[34][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product -PREHOOK: query: select * +PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_4.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_4.q.out index 75cb43899a15..0f09eb27649e 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_4.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_4.q.out @@ -70,7 +70,7 @@ POSTHOOK: query: ALTER TABLE table_b UPDATE STATISTICS FOR COLUMN product_sk SET POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS POSTHOOK: Input: default@table_b POSTHOOK: Output: default@table_b -Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Map 1' is a cross product +Warning: Map Join MAPJOIN[31][bigTable=?] in task 'Map 2' is a cross product PREHOOK: query: EXPLAIN SELECT TC.CONST_DATE, TB.PRODUCT_SK FROM TABLE_A TA @@ -104,8 +104,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) - Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -116,31 +115,68 @@ STAGE PLANS: Statistics: Num rows: 100000000 Data size: 15400000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) - Statistics: Num rows: 50000000 Data size: 7700000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200000 Data size: 30800000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: product_id (type: int), product_sk (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 50000000 Data size: 4900000000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 200000 Data size: 19600000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 200000 Data size: 19600000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 2 + Map Operator Tree: + TableScan + alias: ta + filterExpr: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:product_id, smallTablePos:0, keyRatio:4.0E-4 + Statistics: Num rows: 200000000 Data size: 12000000000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) + Statistics: Num rows: 80000 Data size: 4800000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: product_id (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 80000 Data size: 320000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 input vertices: - 1 Map 2 - Statistics: Num rows: 50000000 Data size: 4900000000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50000000 Data size: 4900000000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + 0 Map 1 + Statistics: Num rows: 80000 Data size: 7520000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 80000 Data size: 7520000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: DATE'2023-11-27' (type: date), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 80000 Data size: 12000000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 80000 Data size: 12000000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: _dummy_table @@ -154,52 +190,6 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: ta - filterExpr: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) - Statistics: Num rows: 200000000 Data size: 12000000000 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((start_date = DATE'2023-11-27') and product_id is not null) (type: boolean) - Statistics: Num rows: 100000000 Data size: 6000000000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: product_id (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 100000000 Data size: 400000000 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: all inputs - Reducer 4 - Execution mode: vectorized, llap - Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: int) - 1 KEY.reducesinkkey0 (type: int) - outputColumnNames: _col1 - input vertices: - 0 Map 1 - Statistics: Num rows: 16666666666 Data size: 1566666666604 Basic stats: COMPLETE Column stats: COMPLETE - DynamicPartitionHashJoin: true - Select Operator - expressions: DATE'2023-11-27' (type: date), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16666666666 Data size: 2499999999900 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 16666666666 Data size: 2499999999900 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out index 673b962e0e25..d45764c3fbec 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fixed_bucket_pruning.q.out @@ -520,7 +520,7 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Estimated key counts: Map 4 => 90170 + Estimated key counts: Map 4 => 180340 keys: 0 _col1 (type: bigint), _col0 (type: bigint) 1 _col0 (type: bigint), _col2 (type: bigint) @@ -671,11 +671,11 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) - Statistics: Num rows: 90170 Data size: 7213600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 14427200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l3_snapshot_number (type: bigint), plan_key (type: bigint), finplan_detail_object_id (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 4328160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: bigint), _col2 (type: bigint) @@ -683,7 +683,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col2 (type: bigint) - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 4328160 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -978,7 +978,7 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Estimated key counts: Map 4 => 90170 + Estimated key counts: Map 4 => 180340 keys: 0 _col1 (type: bigint), _col0 (type: bigint) 1 _col0 (type: bigint), _col2 (type: bigint) @@ -1130,11 +1130,11 @@ STAGE PLANS: Filter Operator isSamplingPred: false predicate: ((idp_data_date = DATE'2017-12-28') and finplan_detail_object_id is not null and l3_snapshot_number is not null) (type: boolean) - Statistics: Num rows: 90170 Data size: 7213600 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 14427200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: l3_snapshot_number (type: bigint), plan_key (type: bigint), finplan_detail_object_id (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 4328160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: bigint), _col2 (type: bigint) @@ -1142,7 +1142,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col2 (type: bigint) - Statistics: Num rows: 90170 Data size: 2164080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180340 Data size: 4328160 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: bigint) auto parallelism: true diff --git a/ql/src/test/results/clientpositive/llap/tpch18.q.out b/ql/src/test/results/clientpositive/llap/tpch18.q.out index 6dce589e7ebd..de0c9991b3db 100644 --- a/ql/src/test/results/clientpositive/llap/tpch18.q.out +++ b/ql/src/test/results/clientpositive/llap/tpch18.q.out @@ -116,16 +116,15 @@ HiveSortLimit(sort0=[$4], sort1=[$3], dir0=[DESC], dir1=[ASC], fetch=[100]) HiveProject(o_orderkey=[$0], o_totalprice=[$2], o_orderdate=[$3], c_custkey=[$5], c_name=[$6], $f8=[*($4, $7)]) HiveJoin(condition=[=($5, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(o_orderkey=[$0], o_custkey=[$1], o_totalprice=[$2], o_orderdate=[$3], count=[$4]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(o_orderkey=[$0], o_custkey=[$1], o_totalprice=[$2], o_orderdate=[$3], count=[$4]) - HiveAggregate(group=[{0, 1, 2, 3}], count=[count()]) - HiveProject(o_orderkey=[$0], o_custkey=[$1], o_totalprice=[$3], o_orderdate=[$4]) - HiveTableScan(table=[[tpch_0_001, orders]], table:alias=[orders]) - HiveProject($f0=[$0]) - HiveFilter(condition=[>($1, 3E2)]) - HiveAggregate(group=[{0}], agg#0=[sum($4)]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[tpch_0_001, lineitem]], table:alias=[lineitem]) + HiveAggregate(group=[{0, 1, 2, 3}], count=[count()]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(o_orderkey=[$0], o_custkey=[$1], o_totalprice=[$3], o_orderdate=[$4]) + HiveTableScan(table=[[tpch_0_001, orders]], table:alias=[orders]) + HiveProject($f0=[$0]) + HiveFilter(condition=[>($1, 3E2)]) + HiveAggregate(group=[{0}], agg#0=[sum($4)]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[tpch_0_001, lineitem]], table:alias=[lineitem]) HiveProject(c_custkey=[$0], c_name=[$1], count=[$2]) HiveAggregate(group=[{0, 1}], count=[count()]) HiveProject(c_custkey=[$0], c_name=[$1]) diff --git a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out index ca80131cfdd1..23e8a82b7a2e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -62,7 +62,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColumnInList(col 3:date, values [-171, -67]) predicate: (cdate) IN (DATE'1969-07-14', DATE'1969-10-26') (type: boolean) - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdate (type: date) outputColumnNames: _col0 @@ -70,7 +70,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3] - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z @@ -79,7 +79,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -107,13 +107,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6145 Data size: 169680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 28 Data size: 840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1174,10 +1174,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: _col0 (type: boolean) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.92749614 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -1187,7 +1187,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1222,7 +1222,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -1231,7 +1231,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1249,13 +1249,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3072 Data size: 36864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 891 Data size: 10692 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_date_1.q.out b/ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 334d56eecd80..3e62266aa51e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -978,7 +978,7 @@ STAGE PLANS: native: true predicateExpression: FilterDateColEqualDateScalar(col 0:date, val 11323) predicate: (dt1 = DATE'2001-01-01') (type: boolean) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: DATE'2001-01-01' (type: date), dt2 (type: date) outputColumnNames: _col0, _col1 @@ -987,13 +987,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 1] selectExpressions: ConstantVectorExpression(val 11323) -> 4:date - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 90351996b6e1..2bbf69ab5033 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -240,7 +240,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 29831 Data size: 5966200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 @@ -248,13 +248,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 8, 17] - Statistics: Num rows: 29831 Data size: 5966200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 29831 Data size: 5966200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out index b4a15de44345..db657028a293 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join3.q.out @@ -415,13 +415,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -618,13 +618,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -863,13 +863,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -1784,13 +1784,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -1987,13 +1987,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -2232,13 +2232,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -3153,13 +3153,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -3366,13 +3366,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, @@ -3664,13 +3664,13 @@ POSTHOOK: Input: default@small_alltypesorc_a_n1 }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 10, "minValue": -28810, "maxValue": -28789 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 6, "minValue": -28812, "maxValue": -28786 }, diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out index e3fe66a06b3d..d7abcecbd954 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join4.q.out @@ -429,13 +429,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -682,13 +682,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -1690,13 +1690,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -1888,13 +1888,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -2901,13 +2901,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -3104,13 +3104,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, @@ -3349,13 +3349,13 @@ POSTHOOK: Input: default@small_alltypesorc_b }, { "name": "ctimestamp1", - "ndv": 0, + "ndv": 14, "minValue": -28813, "maxValue": -28788 }, { "name": "ctimestamp2", - "ndv": 0, + "ndv": 17, "minValue": -28816, "maxValue": -28785 }, diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out index dd94e53a68bd..9ea03dbdcca3 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join_constants.q.out @@ -184,7 +184,7 @@ POSTHOOK: type: ANALYZE_TABLE POSTHOOK: Input: default@lday POSTHOOK: Output: default@lday #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: EXPLAIN VECTORIZATION DETAIL select * from (select item1.S_ID S_ID, @@ -272,48 +272,112 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) - Map 6 <- Map 7 (BROADCAST_EDGE) + Map 3 <- Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) + Map 7 <- Map 1 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: od1 - filterExpr: (o_date is not null and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + alias: item1 + filterExpr: ((s_id = 22) and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:id:int, 1:o_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:id:int, 1:s_id:int, 2:name:string, 3:ROW__ID:struct, 4:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:int)) - predicate: (o_date is not null and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 1:int, val 22), SelectColumnIsNotNull(col 0:int)) + predicate: ((s_id = 22) and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int), o_date (type: timestamp) + expressions: id (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1] + dataColumns: id:int, s_id:int, name:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: lday2 + filterExpr: (ly_date is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:d_date:timestamp, 1:ly_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:timestamp)) + predicate: (ly_date is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: timestamp), ly_date (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: timestamp) + 1 _col0 (type: timestamp) Map Join Vectorization: - bigTableKeyColumns: 0:int + bigTableKeyColumns: 0:timestamp bigTableRetainColumnNums: [1] bigTableValueColumns: 1:timestamp - className: VectorMapJoinInnerBigOnlyLongOperator + className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -321,14 +385,14 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: - 1 Map 5 + 1 Map 6 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: timestamp) - 1 _col0 (type: timestamp) + 1 _col1 (type: timestamp) Map Join Vectorization: bigTableKeyColumns: 1:timestamp bigTableRetainColumnNums: [] @@ -364,47 +428,6 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: timestamp) - 1 _col1 (type: timestamp) - Map Join Vectorization: - bigTableKeyColumns: 1:timestamp - bigTableRetainColumnNums: [] - className: VectorMapJoinInnerBigOnlyMultiKeyOperator - native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nonOuterSmallTableKeyMapping: [] - hashTableImplementationType: OPTIMIZED - input vertices: - 0 Map 6 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: ConstantVectorExpression(val 1) -> 5:boolean - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: true (type: boolean) - minReductionHashAggr: 0.75 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: boolean) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: boolean) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:boolean - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -419,44 +442,55 @@ STAGE PLANS: rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: id:int, o_date:timestamp + dataColumns: d_date:timestamp, ly_date:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [bigint, bigint] - Map 5 + scratchColumnTypeNames: [bigint] + Map 6 Map Operator Tree: TableScan - alias: item1 - filterExpr: ((s_id = 22) and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + alias: ytday2 + filterExpr: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:id:int, 1:s_id:int, 2:name:string, 3:ROW__ID:struct, 4:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:d_date:timestamp, 1:ytd_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 1:int, val 22), SelectColumnIsNotNull(col 0:int)) - predicate: ((s_id = 22) and id is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: FilterTimestampColEqualTimestampScalar(col 0:timestamp, val 2008-04-30 00:00:00), SelectColumnIsNotNull(col 1:timestamp)) + predicate: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id (type: int) + expressions: ytd_date (type: timestamp) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [1] + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: timestamp) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col0 (type: timestamp) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int + className: VectorReduceSinkMultiKeyOperator + keyColumns: 1:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 1:timestamp + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -469,46 +503,46 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 3 + dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: id:int, s_id:int, name:string + dataColumns: d_date:timestamp, ytd_date:timestamp partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 6 + Map 7 Map Operator Tree: TableScan - alias: lday2 - filterExpr: (ly_date is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + alias: od2 + filterExpr: (o_date is not null and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true - vectorizationSchemaColumns: [0:d_date:timestamp, 1:ly_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] + vectorizationSchemaColumns: [0:id:int, 1:o_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] Filter Operator Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:timestamp)) - predicate: (ly_date is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 1:timestamp), SelectColumnIsNotNull(col 0:int)) + predicate: (o_date is not null and id is not null) (type: boolean) + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date (type: timestamp), ly_date (type: timestamp) + expressions: id (type: int), o_date (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: timestamp) - 1 _col0 (type: timestamp) + 0 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumns: 0:timestamp + bigTableKeyColumns: 0:int bigTableRetainColumnNums: [1] bigTableValueColumns: 1:timestamp - className: VectorMapJoinInnerBigOnlyMultiKeyOperator + className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -516,7 +550,7 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: - 1 Map 7 + 1 Map 1 Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: timestamp) @@ -529,6 +563,67 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: timestamp) + 1 _col0 (type: timestamp) + Map Join Vectorization: + bigTableKeyColumns: 1:timestamp + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinInnerBigOnlyMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] + className: VectorMapJoinInnerBigOnlyLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED + input vertices: + 1 Reducer 2 + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: ConstantVectorExpression(val 1) -> 4:boolean + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: true (type: boolean) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:boolean + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -537,79 +632,50 @@ STAGE PLANS: inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0, 1] - dataColumns: d_date:timestamp, ly_date:timestamp + dataColumns: id:int, o_date:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 7 - Map Operator Tree: - TableScan - alias: ytday2 - filterExpr: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:d_date:timestamp, 1:ytd_date:timestamp, 2:ROW__ID:struct, 3:ROW__IS__DELETED:boolean] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampColEqualTimestampScalar(col 0:timestamp, val 2008-04-30 00:00:00), SelectColumnIsNotNull(col 1:timestamp)) - predicate: ((d_date = TIMESTAMP'2008-04-30 00:00:00') and ytd_date is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ytd_date (type: timestamp) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 1:timestamp - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: timestamp) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: timestamp) - Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumns: 1:timestamp - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + scratchColumnTypeNames: [bigint] + Reducer 2 Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: + Reduce Vectorization: enabled: true - enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - includeColumns: [0, 1] - dataColumns: d_date:timestamp, ytd_date:timestamp + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -656,7 +722,7 @@ STAGE PLANS: valueColumns: 1:int, 2:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: timestamp) - Reducer 3 + Reducer 5 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -680,7 +746,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 4 + Reducer 8 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -734,7 +800,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[79][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 5' is a cross product PREHOOK: query: select * from (select item1.S_ID S_ID, ytday1.D_DATE D_DATE diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index ff87190d0caf..80c87d730585 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -485,10 +485,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.91240877 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 4352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z @@ -498,7 +498,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 68 Data size: 4352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -533,13 +533,13 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 34 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 34 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3154,10 +3154,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0] keys: fl_date (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.91240877 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 4352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z @@ -3167,7 +3167,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 68 Data size: 4352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) @@ -3202,13 +3202,13 @@ STAGE PLANS: keys: KEY._col0 (type: date) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 34 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 34 Data size: 2176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4170,7 +4170,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_date - Statistics: Num rows: 137 Data size: 12198 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 12195 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -4180,7 +4180,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 12198 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 12195 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -5232,7 +5232,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_parquet_partitioned_timestamp - Statistics: Num rows: 137 Data size: 9911 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 9908 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -5242,7 +5242,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5] - Statistics: Num rows: 137 Data size: 9911 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 137 Data size: 9908 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_ptf_bounded_start.q.out b/ql/src/test/results/clientpositive/llap/vector_ptf_bounded_start.q.out index 87a54a46fe3c..58ab800f08f3 100644 --- a/ql/src/test/results/clientpositive/llap/vector_ptf_bounded_start.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_ptf_bounded_start.q.out @@ -3437,7 +3437,7 @@ STAGE PLANS: native: true predicateExpression: FilterTimestampColEqualTimestampScalar(col 3:timestamp, val 1970-01-03 00:00:00) predicate: (p_timestamp = TIMESTAMP'1970-01-03 00:00:00') (type: boolean) - Statistics: Num rows: 20 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: TIMESTAMP'1970-01-03 00:00:00' (type: timestamp) null sort order: a @@ -3450,7 +3450,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:string, 2:date, 5:double, 10:int - Statistics: Num rows: 20 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_name (type: string), p_date (type: date), p_retailprice (type: double), rowindex (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3492,7 +3492,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 4, 5] - Statistics: Num rows: 20 Data size: 5584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2248 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3532,7 +3532,7 @@ STAGE PLANS: outputTypes: [bigint, double, string, string, date, double, int] partitionExpressions: [ConstantVectorExpression(val 1970-01-03 00:00:00) -> 8:timestamp] streamingColumns: [] - Statistics: Num rows: 20 Data size: 5584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), TIMESTAMP'1970-01-03 00:00:00' (type: timestamp), _col10 (type: int), _col2 (type: date), _col5 (type: double), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -3541,13 +3541,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2, 10, 5, 3, 4, 6, 7] selectExpressions: ConstantVectorExpression(val 1970-01-03 00:00:00) -> 10:timestamp - Statistics: Num rows: 20 Data size: 6464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 6464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2576 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4162,7 +4162,7 @@ STAGE PLANS: native: true predicateExpression: FilterDateColEqualDateScalar(col 2:date, val 2) predicate: (p_date = DATE'1970-01-03') (type: boolean) - Statistics: Num rows: 20 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: DATE'1970-01-03' (type: date) null sort order: a @@ -4175,7 +4175,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:string, 3:timestamp, 5:double, 10:int - Statistics: Num rows: 20 Data size: 6264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_name (type: string), p_timestamp (type: timestamp), p_retailprice (type: double), rowindex (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -4217,7 +4217,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 4, 5] - Statistics: Num rows: 20 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4257,7 +4257,7 @@ STAGE PLANS: outputTypes: [bigint, double, string, string, timestamp, double, int] partitionExpressions: [ConstantVectorExpression(val 2) -> 8:date] streamingColumns: [] - Statistics: Num rows: 20 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: timestamp), _col10 (type: int), DATE'1970-01-03' (type: date), _col5 (type: double), count_window_0 (type: bigint), sum_window_1 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -4266,13 +4266,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 2, 3, 5, 10, 4, 6, 7] selectExpressions: ConstantVectorExpression(val 2) -> 10:date - Statistics: Num rows: 20 Data size: 6576 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 6576 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8 Data size: 2624 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 8db9490d5ae4..f0a2ef011458 100644 --- a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -383,10 +383,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [] keys: _col0 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -396,7 +396,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -428,7 +428,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -437,7 +437,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -454,7 +454,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1853 Data size: 340952 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 Limit Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_11.q.out b/ql/src/test/results/clientpositive/llap/vectorization_11.q.out index 6b9bafb538ab..42634ee640b0 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_11.q.out @@ -49,7 +49,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) + filterExpr: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 12288 Data size: 2256914 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -58,8 +58,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a)), FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string)) - predicate: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string), FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a))) + predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) Statistics: Num rows: 7701 Data size: 1414500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out index dc3212593078..f68d74709d90 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) + filterExpr: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean) Statistics: Num rows: 12288 Data size: 1522994 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -96,8 +96,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), SelectColumnIsNull(col 8:timestamp)) - predicate: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint))) + predicate: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean) Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) diff --git a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out index 445a06ac0f35..48cf350e1e31 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) + filterExpr: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 12288 Data size: 1027540 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -82,8 +82,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 14:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 14:float), FilterDecimalColNotEqualDecimalScalar(col 15:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 15:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 16:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 16:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 17:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 17:double), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 18:decimal(8,3)/DECIMAL_64, val 79553)(children: CastLongToDecimal64(col 1:smallint) -> 18:decimal(8,3)/DECIMAL_64), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) - predicate: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterDecimal64ColGreaterEqualDecimal64Scalar(col 15:decimal(8,3)/DECIMAL_64, val 79553)(children: CastLongToDecimal64(col 1:smallint) -> 15:decimal(8,3)/DECIMAL_64), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 16:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 16:float), FilterDecimalColNotEqualDecimalScalar(col 17:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 17:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 18:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 18:double))) + predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) @@ -133,7 +133,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), double, double, decimal(8,3)/DECIMAL_64, double, double, double, double, double, double, double, double, double, double, double, double, double] + scratchColumnTypeNames: [double, decimal(8,3)/DECIMAL_64, double, decimal(22,3), double, double, double, double, double, double, double, double, double, double, double, double, double, double] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out index 262c0184faf0..5bbdded348a2 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out @@ -761,10 +761,10 @@ STAGE PLANS: keys: 0 _col0 (type: date) 1 _col0 (type: date) - Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.95238096 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -910,10 +910,10 @@ STAGE PLANS: keys: 0 _col0 (type: timestamp) 1 _col0 (type: timestamp) - Statistics: Num rows: 2000 Data size: 16000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.95238096 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 8f722cdd70b1..3e1d638d5947 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -290,7 +290,7 @@ STAGE PLANS: native: true predicateExpression: FilterTimestampColumnInList(col 0:timestamp, values [0001-01-02 16:00:00.0, 0002-02-03 16:00:00.0]) predicate: (ts) IN (TIMESTAMP'0001-01-01 00:00:00', TIMESTAMP'0002-02-02 00:00:00') (type: boolean) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 @@ -298,13 +298,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query12.q.out index ac4f8ced88a8..813daa134207 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query12.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query16.q.out index a6346d4138e8..4e9966445da8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query16.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) HiveAntiJoin(condition=[=($4, $14)], joinType=[anti]) HiveSemiJoin(condition=[AND(=($4, $14), <>($3, $13))], joinType=[semi]) - HiveProject(cs_ship_date_sk=[$0], cs_ship_addr_sk=[$1], cs_call_center_sk=[$2], cs_warehouse_sk=[$3], cs_order_number=[$4], cs_ext_ship_cost=[$5], cs_net_profit=[$6], d_date_sk=[$11], d_date=[$12], ca_address_sk=[$7], ca_state=[$8], cc_call_center_sk=[$9], cc_county=[$10]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'NY')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_ship_date_sk=[$1], cs_ship_addr_sk=[$9], cs_call_center_sk=[$10], cs_warehouse_sk=[$13], cs_order_number=[$16], cs_ext_ship_cost=[$27], cs_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($1), IS NOT NULL($10))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'NY')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) - HiveFilter(condition=[IN($25, _UTF-16LE'Daviess County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Ziebach County':VARCHAR(30) CHARACTER SET "UTF-16LE")]) - HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) + HiveFilter(condition=[IN($25, _UTF-16LE'Daviess County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Ziebach County':VARCHAR(30) CHARACTER SET "UTF-16LE")]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) HiveProject(cs_warehouse_sk=[$13], cs_order_number=[$16]) HiveFilter(condition=[IS NOT NULL($13)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query20.q.out index 455d9e57dd05..c96f9bdb6b8e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query20.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query21.q.out index 7c1d04844a5f..a6d2b3e4911c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query21.q.out @@ -3,18 +3,18 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(x.w_warehouse_name=[$0], x.i_item_id=[$1], x.inv_before=[$2], x.inv_after=[$3]) HiveFilter(condition=[AND(CASE(>($2, 0), <=(6.66667E-1, /(CAST($3):DOUBLE, CAST($2):DOUBLE)), false), CASE(>($2, 0), <=(/(CAST($3):DOUBLE, CAST($2):DOUBLE), 1.5E0), false))]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) - HiveProject($f0=[$10], $f1=[$5], $f2=[CASE($7, $3, 0)], $f3=[CASE($8, $3, 0)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$10], $f2=[CASE($7, $5, 0)], $f3=[CASE($8, $5, 0)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out index fb6e9289cc12..a30ba66757bf 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query23.q.out @@ -39,8 +39,8 @@ HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER] HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[316][bigTable=?] in task 'Reducer 15' is a cross product -Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 14' is a cross product +Warning: Map Join MAPJOIN[316][bigTable=?] in task 'Reducer 17' is a cross product +Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 16' is a cross product CBO PLAN: HiveProject(_c0=[$0]) HiveAggregate(group=[{}], agg#0=[sum($0)]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query32.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query32.q.out index f158d7fea5d7..710837eecbe2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query32.q.out @@ -6,18 +6,18 @@ HiveProject(d_date_sk=[$0]) CBO PLAN: HiveProject(excess discount amount=[$0]) HiveAggregate(group=[{}], agg#0=[sum($1)]) - HiveJoin(condition=[AND(=($6, $3), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($6, $4), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14], cs_ext_discount_amt=[$21], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($21), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($13, 269)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], cs_item_sk=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query37.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query37.q.out index cdac1bab8139..e2ad185eef6f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query37.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query37.q.out @@ -1,14 +1,11 @@ CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) - HiveAggregate(group=[{5, 6, 7}]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{4, 5, 6}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00:TIMESTAMP(9), 2001-08-01 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) @@ -16,4 +13,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) HiveFilter(condition=[AND(IN($13, 678, 849, 918, 964), BETWEEN(false, $5, 22:DECIMAL(12, 2), 52:DECIMAL(12, 2)))]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00:TIMESTAMP(9), 2001-08-01 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query40.q.out index 6ddf309c5979..582967b597c1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query40.q.out @@ -2,22 +2,22 @@ CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(w_state=[$0], i_item_id=[$1], sales_before=[$2], sales_after=[$3]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) - HiveProject($f0=[$14], $f1=[$9], $f2=[CASE($11, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))], $f3=[CASE($12, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))]) + HiveProject($f0=[$14], $f1=[$12], $f2=[CASE($9, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))], $f3=[CASE($10, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))]) HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($8, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($11, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $5), =($2, $6))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(cs_warehouse_sk=[$13], cs_item_sk=[$14], cs_order_number=[$16], cs_sales_price=[$20], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_refunded_cash=[$22]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(w_warehouse_sk=[$0], w_state=[$10]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query5.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query5.q.out index 4fc7d0ba8c0d..c57a4bf13c24 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query5.q.out @@ -10,9 +10,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) HiveUnion(all=[true]) HiveProject(channel=[_UTF-16LE'store channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(store_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(store_sk=[$6], date_sk=[$22], sales_price=[$14], profit=[$21], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -21,11 +21,11 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(store_sk=[$6], date_sk=[$19], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$10], net_loss=[$18]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($19))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(s_store_sk=[$0], s_store_id=[$1]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) @@ -44,9 +44,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wsr_web_site_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(wsr_web_site_sk=[$12], date_sk=[$33], sales_price=[$22], profit=[$32], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -60,9 +60,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22], wr_returned_date_sk=[$23]) HiveFilter(condition=[IS NOT NULL($23)]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(web_site_sk=[$0], web_site_id=[$1]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_site_id=[$1]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out index a321f8b20706..de415dace201 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query58.q.out @@ -1,24 +1,25 @@ CTE Suggestion: HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveFilter(condition=[sq_count_check($0)]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) CTE Suggestion: HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 7' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 7' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 8' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) @@ -36,19 +37,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -65,19 +66,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -93,19 +94,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out index 159a27a6ab07..5f68efc24bc6 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query80.q.out @@ -23,72 +23,72 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($0, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1], ss_store_sk=[$6], ss_promo_sk=[$7], ss_ticket_number=[$8], ss_ext_sales_price=[$14], ss_net_profit=[$21], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($7), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8], sr_return_amt=[$10], sr_net_loss=[$18]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(cs_catalog_page_sk=[$11], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_ext_sales_price=[$22], cs_net_profit=[$32], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($15), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_return_amount=[$17], cr_net_loss=[$25]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($0, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_web_site_sk=[$12], ws_promo_sk=[$15], ws_order_number=[$16], ws_ext_sales_price=[$22], ws_net_profit=[$32], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($15), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(web_site_sk=[$0], web_site_id=[$1]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query82.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query82.q.out index 9f9be64c934f..44172d451daa 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query82.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query82.q.out @@ -1,14 +1,11 @@ CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) - HiveAggregate(group=[{5, 6, 7}]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{4, 5, 6}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00:TIMESTAMP(9), 2002-07-29 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) @@ -16,4 +13,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) HiveFilter(condition=[AND(IN($13, 129, 437, 663, 727), BETWEEN(false, $5, 30:DECIMAL(12, 2), 60:DECIMAL(12, 2)))]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00:TIMESTAMP(9), 2002-07-29 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query92.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query92.q.out index a5167a68b8a1..90adfb99205a 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query92.q.out @@ -5,27 +5,27 @@ HiveProject(d_date_sk=[$0]) CBO PLAN: HiveProject(excess discount amount=[$0]) - HiveAggregate(group=[{}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[AND(=($5, $3), >($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{}], agg#0=[sum($1)]) + HiveJoin(condition=[AND(=($6, $4), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($21), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($13, 269)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], ws_item_sk=[$0]) - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) - HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) - HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], ws_item_sk=[$0]) + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query94.q.out index 982bd647bb78..5f14c7b74791 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query94.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) HiveAntiJoin(condition=[=($4, $14)], joinType=[anti]) HiveSemiJoin(condition=[AND(=($4, $14), <>($3, $13))], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_warehouse_sk=[$3], ws_order_number=[$4], ws_ext_ship_cost=[$5], ws_net_profit=[$6], d_date_sk=[$11], d_date=[$12], ca_address_sk=[$7], ca_state=[$8], web_site_sk=[$9], web_company_name=[$10]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_warehouse_sk=[$14], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(ws_warehouse_sk=[$14], ws_order_number=[$16]) HiveFilter(condition=[IS NOT NULL($14)]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query95.q.out index 9d39c369316e..57eaa4112026 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query95.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $3)], agg#1=[sum($4)], agg#2=[sum($5)]) HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_order_number=[$3], ws_ext_ship_cost=[$4], ws_net_profit=[$5], d_date_sk=[$10], d_date=[$11], ca_address_sk=[$6], ca_state=[$7], web_site_sk=[$8], web_company_name=[$9]) - HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_order_number=[$5], ws_ext_ship_cost=[$6], ws_net_profit=[$7], d_date_sk=[$8], d_date=[$9], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$10], web_company_name=[$11]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(ws_order_number=[$1]) HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_warehouse_sk=[$14], ws_order_number=[$16]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query98.q.out index 680a11e2bde1..e6db70d26a68 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/cte/cbo_query98.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[IS NOT NULL($22)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query12.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query12.q.out index ac4f8ced88a8..813daa134207 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query12.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query16.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query16.q.out index a6346d4138e8..4e9966445da8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query16.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) HiveAntiJoin(condition=[=($4, $14)], joinType=[anti]) HiveSemiJoin(condition=[AND(=($4, $14), <>($3, $13))], joinType=[semi]) - HiveProject(cs_ship_date_sk=[$0], cs_ship_addr_sk=[$1], cs_call_center_sk=[$2], cs_warehouse_sk=[$3], cs_order_number=[$4], cs_ext_ship_cost=[$5], cs_net_profit=[$6], d_date_sk=[$11], d_date=[$12], ca_address_sk=[$7], ca_state=[$8], cc_call_center_sk=[$9], cc_county=[$10]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_ship_date_sk=[$2], cs_ship_addr_sk=[$3], cs_call_center_sk=[$4], cs_warehouse_sk=[$5], cs_order_number=[$6], cs_ext_ship_cost=[$7], cs_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], cc_call_center_sk=[$11], cc_county=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'NY')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_ship_date_sk=[$1], cs_ship_addr_sk=[$9], cs_call_center_sk=[$10], cs_warehouse_sk=[$13], cs_order_number=[$16], cs_ext_ship_cost=[$27], cs_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($9), IS NOT NULL($1), IS NOT NULL($10))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'NY'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'NY')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) - HiveFilter(condition=[IN($25, _UTF-16LE'Daviess County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Ziebach County':VARCHAR(30) CHARACTER SET "UTF-16LE")]) - HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-04-01 00:00:00:TIMESTAMP(9), 2001-05-31 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cc_call_center_sk=[$0], cc_county=[$25]) + HiveFilter(condition=[IN($25, _UTF-16LE'Daviess County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Franklin Parish':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Huron County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Levy County':VARCHAR(30) CHARACTER SET "UTF-16LE", _UTF-16LE'Ziebach County':VARCHAR(30) CHARACTER SET "UTF-16LE")]) + HiveTableScan(table=[[default, call_center]], table:alias=[call_center]) HiveProject(cs_warehouse_sk=[$13], cs_order_number=[$16]) HiveFilter(condition=[IS NOT NULL($13)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[cs2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query20.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query20.q.out index 455d9e57dd05..c96f9bdb6b8e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query20.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC], fetch=[100]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) HiveFilter(condition=[IS NOT NULL($33)]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query21.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query21.q.out index 7c1d04844a5f..a6d2b3e4911c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query21.q.out @@ -3,18 +3,18 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(x.w_warehouse_name=[$0], x.i_item_id=[$1], x.inv_before=[$2], x.inv_after=[$3]) HiveFilter(condition=[AND(CASE(>($2, 0), <=(6.66667E-1, /(CAST($3):DOUBLE, CAST($2):DOUBLE)), false), CASE(>($2, 0), <=(/(CAST($3):DOUBLE, CAST($2):DOUBLE), 1.5E0), false))]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) - HiveProject($f0=[$10], $f1=[$5], $f2=[CASE($7, $3, 0)], $f3=[CASE($8, $3, 0)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) - HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f0=[$1], $f1=[$10], $f2=[CASE($7, $5, 0)], $f3=[CASE($8, $5, 0)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(w_warehouse_sk=[$0], w_warehouse_name=[$2]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) + HiveJoin(condition=[=($7, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(inv_date_sk=[$0], inv_item_sk=[$1], inv_warehouse_sk=[$2], inv_quantity_on_hand=[$3]) + HiveTableScan(table=[[default, inventory]], table:alias=[inventory]) + HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query32.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query32.q.out index 8ccb8381e302..4017680294ad 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query32.q.out @@ -1,18 +1,18 @@ CBO PLAN: HiveProject(excess discount amount=[$0]) HiveAggregate(group=[{}], agg#0=[sum($1)]) - HiveJoin(condition=[AND(=($6, $3), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[AND(=($6, $4), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14], cs_ext_discount_amt=[$21], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($21), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($13, 269)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], cs_item_sk=[$0]) HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query37.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query37.q.out index cdac1bab8139..e2ad185eef6f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query37.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query37.q.out @@ -1,14 +1,11 @@ CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) - HiveAggregate(group=[{5, 6, 7}]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{4, 5, 6}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cs_item_sk=[$14]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00:TIMESTAMP(9), 2001-08-01 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) @@ -16,4 +13,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) HiveFilter(condition=[AND(IN($13, 678, 849, 918, 964), BETWEEN(false, $5, 22:DECIMAL(12, 2), 52:DECIMAL(12, 2)))]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-06-02 00:00:00:TIMESTAMP(9), 2001-08-01 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query40.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query40.q.out index 6ddf309c5979..582967b597c1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query40.q.out @@ -2,22 +2,22 @@ CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(w_state=[$0], i_item_id=[$1], sales_before=[$2], sales_after=[$3]) HiveAggregate(group=[{0, 1}], agg#0=[sum($2)], agg#1=[sum($3)]) - HiveProject($f0=[$14], $f1=[$9], $f2=[CASE($11, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))], $f3=[CASE($12, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))]) + HiveProject($f0=[$14], $f1=[$12], $f2=[CASE($9, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))], $f3=[CASE($10, -($3, CASE(IS NOT NULL($7), $7, 0:DECIMAL(12, 2))), 0:DECIMAL(13, 2))]) HiveJoin(condition=[=($0, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($8, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($11, $1)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $8)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $5), =($2, $6))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(cs_warehouse_sk=[$13], cs_item_sk=[$14], cs_order_number=[$16], cs_sales_price=[$20], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($13), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_refunded_cash=[$22]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], EXPR$0=[<($2, 1998-04-08)], EXPR$1=[>=($2, 1998-04-08)]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-09 00:00:00:TIMESTAMP(9), 1998-05-08 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveFilter(condition=[BETWEEN(false, $5, 0.99:DECIMAL(3, 2), 1.49:DECIMAL(3, 2))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(w_warehouse_sk=[$0], w_state=[$10]) HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query5.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query5.q.out index 3c0a718cb8fb..647a8170af6d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query5.q.out @@ -5,9 +5,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(channel=[$0], id=[$1], sales=[$2], returns=[$3], profit=[$4]) HiveUnion(all=[true]) HiveProject(channel=[_UTF-16LE'store channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'store':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(store_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(store_sk=[$6], date_sk=[$22], sales_price=[$14], profit=[$21], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -16,15 +16,15 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(store_sk=[$6], date_sk=[$19], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$10], net_loss=[$18]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($19))]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(s_store_sk=[$0], s_store_id=[$1]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_store_id=[$1]) + HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(page_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(page_sk=[$11], date_sk=[$33], sales_price=[$22], profit=[$32], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -33,15 +33,15 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(page_sk=[$11], date_sk=[$26], sales_price=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], profit=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], return_amt=[$17], net_loss=[$25]) HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($26))]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) - HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) + HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$3], profit=[-($2, $4)]) - HiveAggregate(group=[{7}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) - HiveJoin(condition=[=($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{8}], agg#0=[sum($2)], agg#1=[sum($3)], agg#2=[sum($4)], agg#3=[sum($5)]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wsr_web_site_sk=[$0], date_sk=[$1], sales_price=[$2], profit=[$3], return_amt=[$4], net_loss=[$5]) HiveUnion(all=[true]) HiveProject(wsr_web_site_sk=[$12], date_sk=[$33], sales_price=[$22], profit=[$32], return_amt=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)], net_loss=[CAST(0:DECIMAL(7, 2)):DECIMAL(7, 2)]) @@ -55,9 +55,9 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22], wr_returned_date_sk=[$23]) HiveFilter(condition=[IS NOT NULL($23)]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(web_site_sk=[$0], web_site_id=[$1]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-08-18 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_site_id=[$1]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out index 9664e4762b89..de98d243fa41 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(ss_items.item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) @@ -16,19 +17,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) @@ -45,19 +46,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -73,19 +74,19 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(d_date=[$0]) HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) HiveFilter(condition=[sq_count_check($0)]) HiveProject(cnt=[$0]) HiveAggregate(group=[{}], cnt=[COUNT()]) HiveFilter(condition=[=($2, 1998-02-19)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query80.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query80.q.out index a518625aad5e..2fea65b34ef5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query80.q.out @@ -8,72 +8,72 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($0, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1], ss_store_sk=[$6], ss_promo_sk=[$7], ss_ticket_number=[$8], ss_ext_sales_price=[$14], ss_net_profit=[$21], ss_sold_date_sk=[$22]) HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($7), IS NOT NULL($22))]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) HiveProject(sr_item_sk=[$1], sr_ticket_number=[$8], sr_return_amt=[$10], sr_net_loss=[$18]) HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(s_store_sk=[$0], s_store_id=[$1]) HiveTableScan(table=[[default, store]], table:alias=[store]) HiveProject(channel=[_UTF-16LE'catalog channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'catalog_page':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($0, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($1, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(cs_catalog_page_sk=[$11], cs_item_sk=[$14], cs_promo_sk=[$15], cs_order_number=[$16], cs_ext_sales_price=[$22], cs_net_profit=[$32], cs_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($15), IS NOT NULL($33))]) HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) HiveProject(cr_item_sk=[$1], cr_order_number=[$15], cr_return_amount=[$17], cr_net_loss=[$25]) HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(cp_catalog_page_sk=[$0], cp_catalog_page_id=[$1]) HiveTableScan(table=[[default, catalog_page]], table:alias=[catalog_page]) HiveProject(channel=[_UTF-16LE'web channel':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"], id=[||(_UTF-16LE'web_site':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", $0)], sales=[$1], returns=[$2], profit=[$3]) HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[sum($2)], agg#2=[sum($3)]) HiveProject($f0=[$15], $f1=[$4], $f2=[CASE(IS NOT NULL($9), $9, 0:DECIMAL(12, 2))], $f3=[-($5, CASE(IS NOT NULL($10), $10, 0:DECIMAL(12, 2)))]) HiveJoin(condition=[=($1, $14)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($6, $13)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $12)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $13)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $12)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($6, $11)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[AND(=($0, $7), =($3, $8))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_web_site_sk=[$12], ws_promo_sk=[$15], ws_order_number=[$16], ws_ext_sales_price=[$22], ws_net_profit=[$32], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($12), IS NOT NULL($15), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) HiveProject(wr_item_sk=[$1], wr_order_number=[$12], wr_return_amt=[$14], wr_net_loss=[$22]) HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(p_promo_sk=[$0]) - HiveFilter(condition=[=($11, _UTF-16LE'N')]) - HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-08-04 00:00:00:TIMESTAMP(9), 1998-09-03 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[>($5, 50:DECIMAL(2, 0))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(p_promo_sk=[$0]) + HiveFilter(condition=[=($11, _UTF-16LE'N')]) + HiveTableScan(table=[[default, promotion]], table:alias=[promotion]) HiveProject(web_site_sk=[$0], web_site_id=[$1]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query82.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query82.q.out index 9f9be64c934f..44172d451daa 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query82.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query82.q.out @@ -1,14 +1,11 @@ CBO PLAN: HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], i_item_desc=[$1], i_current_price=[$2]) - HiveAggregate(group=[{5, 6, 7}]) - HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{4, 5, 6}]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00:TIMESTAMP(9), 2002-07-29 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(inv_date_sk=[$0], inv_item_sk=[$1]) HiveFilter(condition=[BETWEEN(false, $3, 100, 500)]) @@ -16,4 +13,7 @@ HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100]) HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5]) HiveFilter(condition=[AND(IN($13, 129, 437, 663, 727), BETWEEN(false, $5, 30:DECIMAL(12, 2), 60:DECIMAL(12, 2)))]) HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2002-05-30 00:00:00:TIMESTAMP(9), 2002-07-29 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query92.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query92.q.out index 29e3cfdc3ff4..5e5810049a88 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query92.q.out @@ -1,26 +1,26 @@ CBO PLAN: HiveProject(excess discount amount=[$0]) - HiveAggregate(group=[{}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveJoin(condition=[AND(=($5, $3), >($1, $4))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{}], agg#0=[sum($1)]) + HiveJoin(condition=[AND(=($6, $4), >($1, $5))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) HiveFilter(condition=[AND(IS NOT NULL($21), IS NOT NULL($33))]) HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(i_item_sk=[$0]) - HiveFilter(condition=[=($13, 269)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], ws_item_sk=[$0]) - HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) - HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) - HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveFilter(condition=[=($13, 269)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(_o__c0=[*(1.3:DECIMAL(2, 1), CAST(/($1, $2)):DECIMAL(11, 6))], ws_item_sk=[$0]) + HiveFilter(condition=[IS NOT NULL(CAST(/($1, $2)):DECIMAL(11, 6))]) + HiveAggregate(group=[{0}], agg#0=[sum($1)], agg#1=[count($1)]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_ext_discount_amt=[$21], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00:TIMESTAMP(9), 1998-06-16 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query94.q.out index 982bd647bb78..5f14c7b74791 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query94.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)]) HiveAntiJoin(condition=[=($4, $14)], joinType=[anti]) HiveSemiJoin(condition=[AND(=($4, $14), <>($3, $13))], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_warehouse_sk=[$3], ws_order_number=[$4], ws_ext_ship_cost=[$5], ws_net_profit=[$6], d_date_sk=[$11], d_date=[$12], ca_address_sk=[$7], ca_state=[$8], web_site_sk=[$9], web_company_name=[$10]) - HiveJoin(condition=[=($0, $11)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12]) + HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_warehouse_sk=[$14], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(ws_warehouse_sk=[$14], ws_order_number=[$16]) HiveFilter(condition=[IS NOT NULL($14)]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out index 9d39c369316e..57eaa4112026 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out @@ -3,22 +3,22 @@ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2]) HiveAggregate(group=[{}], agg#0=[count(DISTINCT $3)], agg#1=[sum($4)], agg#2=[sum($5)]) HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_order_number=[$3], ws_ext_ship_cost=[$4], ws_net_profit=[$5], d_date_sk=[$10], d_date=[$11], ca_address_sk=[$6], ca_state=[$7], web_site_sk=[$8], web_company_name=[$9]) - HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_order_number=[$5], ws_ext_ship_cost=[$6], ws_net_profit=[$7], d_date_sk=[$8], d_date=[$9], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$10], web_company_name=[$11]) + HiveJoin(condition=[=($4, $10)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($8, _UTF-16LE'TX')]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($8, _UTF-16LE'TX')]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) - HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) - HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00:TIMESTAMP(9), 1999-06-30 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) + HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(ws_order_number=[$1]) HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_warehouse_sk=[$14], ws_order_number=[$16]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query98.q.out index 680a11e2bde1..e6db70d26a68 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query98.q.out @@ -2,16 +2,16 @@ CBO PLAN: HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5]) HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC]) HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, 100:DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))], (tok_table_or_col i_item_id)=[$0]) - HiveAggregate(group=[{4, 5, 6, 7, 8}], agg#0=[sum($1)]) - HiveJoin(condition=[=($2, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveAggregate(group=[{5, 6, 7, 8, 9}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$1], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) HiveFilter(condition=[IS NOT NULL($22)]) HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) - HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00:TIMESTAMP(9), 2001-02-11 00:00:00:TIMESTAMP(9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12]) + HiveFilter(condition=[IN($12, _UTF-16LE'Books', _UTF-16LE'Jewelry', _UTF-16LE'Sports')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query12.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query12.q.out index 8d5eeb1dcade..973a5e460ab8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query12.q.out @@ -17,7 +17,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_sales - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_56_container, bigKeyColName:ws_item_sk, smallTablePos:1, keyRatio:0.2727272808816537 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_52_container, bigKeyColName:ws_item_sk, smallTablePos:1, keyRatio:0.030300956815565664 Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) @@ -27,26 +27,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1 input vertices: 1 Map 5 - Statistics: Num rows: 5889447025 Data size: 4110531380410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2399240019 Data size: 287606194744 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col5, _col6, _col7, _col8, _col9 input vertices: 1 Map 6 Statistics: Num rows: 654338207 Data size: 451190719790 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) - keys: _col8 (type: char(50)), _col7 (type: char(50)), _col4 (type: string), _col5 (type: varchar(200)), _col6 (type: decimal(7,2)) - minReductionHashAggr: 0.98058045 + keys: _col9 (type: char(50)), _col8 (type: char(50)), _col5 (type: string), _col6 (type: varchar(200)), _col7 (type: decimal(7,2)) + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 126000 Data size: 86940000 Basic stats: COMPLETE Column stats: COMPLETE @@ -60,28 +60,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 6 Map Operator Tree: TableScan alias: date_dim @@ -118,6 +96,28 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query16.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query16.q.out index 9cd4986a6e30..0fe6be75d613 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query16.q.out @@ -23,7 +23,7 @@ STAGE PLANS: TableScan alias: cs1 filterExpr: (cs_ship_addr_sk is not null and cs_ship_date_sk is not null and cs_call_center_sk is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_119_container, bigKeyColName:cs_call_center_sk, smallTablePos:1, keyRatio:1.8509578697501366E-10 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_120_container, bigKeyColName:cs_call_center_sk, smallTablePos:1, keyRatio:4.13026255875998E-4 Statistics: Num rows: 43220864887 Data size: 11379157992136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (cs_ship_addr_sk is not null and cs_ship_date_sk is not null and cs_call_center_sk is not null) (type: boolean) @@ -36,27 +36,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 8 - Statistics: Num rows: 803365808 Data size: 176672786488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4730608045 Data size: 1182045115232 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col4, _col5, _col6 + outputColumnNames: _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 9 - Statistics: Num rows: 160673164 Data size: 19280779808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 89256757 Data size: 10710810968 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col3, _col4, _col5, _col6 input vertices: @@ -89,22 +89,22 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: call_center + filterExpr: (cc_county) IN ('Daviess County', 'Franklin Parish', 'Huron County', 'Levy County', 'Ziebach County') (type: boolean) + Statistics: Num rows: 60 Data size: 6360 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (cc_county) IN ('Daviess County', 'Franklin Parish', 'Huron County', 'Levy County', 'Ziebach County') (type: boolean) + Statistics: Num rows: 12 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint) + expressions: cc_call_center_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 @@ -165,43 +165,43 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_state = 'NY') (type: boolean) - Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ca_state = 'NY') (type: boolean) - Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'2001-04-01 00:00:00' AND TIMESTAMP'2001-05-31 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ca_address_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan - alias: call_center - filterExpr: (cc_county) IN ('Daviess County', 'Franklin Parish', 'Huron County', 'Levy County', 'Ziebach County') (type: boolean) - Statistics: Num rows: 60 Data size: 6360 Basic stats: COMPLETE Column stats: COMPLETE + alias: customer_address + filterExpr: (ca_state = 'NY') (type: boolean) + Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cc_county) IN ('Daviess County', 'Franklin Parish', 'Huron County', 'Levy County', 'Ziebach County') (type: boolean) - Statistics: Num rows: 12 Data size: 1272 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (ca_state = 'NY') (type: boolean) + Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cc_call_center_sk (type: bigint) + expressions: ca_address_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query20.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query20.q.out index e7dac6389994..2c85d5ee0f7b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query20.q.out @@ -17,7 +17,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: catalog_sales - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_56_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:0.2727272808721824 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_52_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:0.030300956805910575 Statistics: Num rows: 43005109025 Data size: 5492607208208 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) @@ -27,26 +27,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1 input vertices: 1 Map 5 - Statistics: Num rows: 11728666448 Data size: 8174562398208 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4778018342 Data size: 561315454048 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col5, _col6, _col7, _col8, _col9 input vertices: 1 Map 6 Statistics: Num rows: 1303095951 Data size: 887089423694 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) - keys: _col8 (type: char(50)), _col7 (type: char(50)), _col4 (type: string), _col5 (type: varchar(200)), _col6 (type: decimal(7,2)) - minReductionHashAggr: 0.99 + keys: _col9 (type: char(50)), _col8 (type: char(50)), _col5 (type: string), _col6 (type: varchar(200)), _col7 (type: decimal(7,2)) + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 126000 Data size: 86940000 Basic stats: COMPLETE Column stats: COMPLETE @@ -60,28 +60,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 6 Map Operator Tree: TableScan alias: date_dim @@ -118,6 +96,28 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query21.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query21.q.out index b8ecf93fa13f..2220fbc9ec45 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query21.q.out @@ -16,7 +16,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_74_container, bigKeyColName:inv_item_sk, smallTablePos:1, keyRatio:2.457218293744475E-9 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_75_container, bigKeyColName:inv_item_sk, smallTablePos:1, keyRatio:0.0015429438826629121 Statistics: Num rows: 1627857000 Data size: 45254407088 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: inv_date_sk (type: bigint), inv_item_sk (type: bigint), inv_warehouse_sk (type: bigint), inv_quantity_on_hand (type: int) @@ -26,94 +26,94 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col5 + outputColumnNames: _col1, _col2, _col3, _col5, _col6 input vertices: 1 Map 4 - Statistics: Num rows: 22606776 Data size: 2622386020 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 180860619 Data size: 4738508420 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col5, _col7, _col8 + outputColumnNames: _col2, _col3, _col5, _col6, _col8 input vertices: 1 Map 5 - Statistics: Num rows: 2511693 Data size: 291356392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2511692 Data size: 291356276 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col3, _col5, _col7, _col8, _col10 + outputColumnNames: _col3, _col5, _col6, _col8, _col10 input vertices: 1 Map 6 - Statistics: Num rows: 2511693 Data size: 522432148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2511692 Data size: 522431940 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col10 (type: varchar(20)), _col5 (type: string), if(_col7, _col3, 0) (type: int), if(_col8, _col3, 0) (type: int) + expressions: _col10 (type: varchar(20)), _col8 (type: string), if(_col5, _col3, 0) (type: int), if(_col6, _col3, 0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2511693 Data size: 522432148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2511692 Data size: 522431940 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col3) keys: _col0 (type: varchar(20)), _col1 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.9867269 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 57753 Data size: 12474648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 519696 Data size: 112254336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(20)), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: varchar(20)), _col1 (type: string) - Statistics: Num rows: 57753 Data size: 12474648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 519696 Data size: 112254336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 4 Map Operator Tree: TableScan - alias: item - filterExpr: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) - Statistics: Num rows: 462000 Data size: 101509408 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) - Statistics: Num rows: 6416 Data size: 1409840 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE + expressions: d_date_sk (type: bigint), (d_date < DATE'1998-04-08') (type: boolean), (d_date >= DATE'1998-04-08') (type: boolean) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8116 Data size: 129856 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Statistics: Num rows: 8116 Data size: 129856 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean), _col2 (type: boolean) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: item + filterExpr: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) + Statistics: Num rows: 462000 Data size: 101509408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-09 00:00:00' AND TIMESTAMP'1998-05-08 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) + Statistics: Num rows: 6416 Data size: 1409840 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint), (d_date < DATE'1998-04-08') (type: boolean), (d_date >= DATE'1998-04-08') (type: boolean) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8116 Data size: 129856 Basic stats: COMPLETE Column stats: COMPLETE + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 129856 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: boolean), _col2 (type: boolean) + Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 6 @@ -142,21 +142,21 @@ STAGE PLANS: keys: KEY._col0 (type: varchar(20)), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19251 Data size: 4158216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 173232 Data size: 37418112 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (if((_col2 > 0L), (0.666667D <= (UDFToDouble(_col3) / UDFToDouble(_col2))), false) and if((_col2 > 0L), ((UDFToDouble(_col3) / UDFToDouble(_col2)) <= 1.5D), false)) (type: boolean) - Statistics: Num rows: 4812 Data size: 1039392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43308 Data size: 9354528 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: varchar(20)), _col1 (type: string) null sort order: zz - Statistics: Num rows: 4812 Data size: 1039392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43308 Data size: 9354528 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Reduce Output Operator key expressions: _col0 (type: varchar(20)), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 4812 Data size: 1039392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43308 Data size: 9354528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -164,7 +164,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(20)), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4812 Data size: 1039392 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 43308 Data size: 9354528 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 21600 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out index 1e33338c615a..bc67018f6574 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out @@ -358,13 +358,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 33166566982 Data size: 2387992822704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66333133964 Data size: 4775985645408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 33166566982 Data size: 2387992822704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 66333133964 Data size: 4775985645408 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -396,11 +396,11 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 16583283491 Data size: 1193996411352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 545240156 Data size: 39257291232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col2 - Statistics: Num rows: 16583283491 Data size: 265332535856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 545240156 Data size: 8723842496 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -410,36 +410,36 @@ STAGE PLANS: outputColumnNames: _col2, _col3 input vertices: 1 Map 5 - Statistics: Num rows: 16583283491 Data size: 265332535856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 545240156 Data size: 8723842496 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: bigint), _col2 (type: bigint) outputColumnNames: _col0, _col2 - Statistics: Num rows: 16583283491 Data size: 265332535856 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 545240156 Data size: 8723842496 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 > 4L) (type: boolean) - Statistics: Num rows: 5527761163 Data size: 88444178608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 181746718 Data size: 2907947488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 5527761163 Data size: 44222089304 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 181746718 Data size: 1453973744 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: bigint) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 64255141 Data size: 514041128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2228502 Data size: 17828016 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 64255141 Data size: 514041128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2228502 Data size: 17828016 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 64255141 Data size: 514041128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2228502 Data size: 17828016 Basic stats: COMPLETE Column stats: COMPLETE Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query32.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query32.q.out index 5423fb1d2107..1b634445238c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query32.q.out @@ -8,7 +8,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Map 5 <- Map 4 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 1 (BROADCAST_EDGE), Map 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) @@ -19,7 +19,7 @@ STAGE PLANS: TableScan alias: catalog_sales filterExpr: cs_ext_discount_amt is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_91_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:0.0010104727318500269 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_87_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:1.1226707964380053E-4 Statistics: Num rows: 43005109025 Data size: 5492699040592 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cs_ext_discount_amt is not null (type: boolean) @@ -32,35 +32,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3 + outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 43455490 Data size: 695287952 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4766159119 Data size: 560013852168 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col3 + outputColumnNames: _col1, _col4 input vertices: 1 Map 4 Statistics: Num rows: 4828058 Data size: 38624576 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: bigint) + key expressions: _col4 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: bigint) + Map-reduce partition columns: _col4 (type: bigint) Statistics: Num rows: 4828058 Data size: 38624576 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Select Operator - expressions: _col3 (type: bigint) - outputColumnNames: _col3 + expressions: _col4 (type: bigint) + outputColumnNames: _col4 Statistics: Num rows: 4828058 Data size: 38624464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col3), max(_col3), bloom_filter(_col3, expectedEntries=1000000) + aggregations: min(_col4), max(_col4), bloom_filter(_col4, expectedEntries=1000000) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -73,27 +73,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 3 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_manufact_id = 269) (type: boolean) - Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i_manufact_id = 269) (type: boolean) - Statistics: Num rows: 468 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 4 Map Operator Tree: TableScan alias: date_dim @@ -134,6 +113,43 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cs_sold_date_sk (bigint) + Target Input: catalog_sales + Partition key expr: cs_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 5 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_manufact_id = 269) (type: boolean) + Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_manufact_id = 269) (type: boolean) + Statistics: Num rows: 468 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 @@ -157,7 +173,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1 input vertices: - 1 Map 4 + 1 Map 3 Statistics: Num rows: 4778018342 Data size: 561407286432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count(_col1) @@ -208,7 +224,7 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: bigint) + 0 _col4 (type: bigint) 1 _col1 (type: bigint) outputColumnNames: _col1, _col5 input vertices: diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query38.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query38.q.out index 9a35919c5a20..a56673a3e5b2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query38.q.out @@ -251,13 +251,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2122508751 Data size: 500912065236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 2122508751 Data size: 500912065236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: @@ -265,30 +265,30 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061254375 Data size: 250456032500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061254375 Data size: 250456032500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 265313593 Data size: 64736516692 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1035784270732 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col3) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 2 Execution mode: vectorized, llap @@ -309,13 +309,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7919716636 Data size: 1869053126096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15839433273 Data size: 3738106252428 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 7919716636 Data size: 1869053126096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15839433273 Data size: 3738106252428 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: @@ -323,30 +323,30 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3959858318 Data size: 934526563048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2238783746216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3959858318 Data size: 934526563048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2238783746216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 989964579 Data size: 241551357276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2314674720664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col3) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -356,11 +356,11 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 444670820 Data size: 108499680080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2314674720664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: bigint) outputColumnNames: _col3 - Statistics: Num rows: 444670820 Data size: 3557366560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 75890974448 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col3 = 3L) (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -411,13 +411,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4187240873 Data size: 988188846028 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 4187240873 Data size: 988188846028 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: @@ -425,30 +425,30 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2093620436 Data size: 494094422896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2093620436 Data size: 494094422896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 523405109 Data size: 127710846596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 2043373546024 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col3) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 889341640 Data size: 216999360160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 22105871055 Data size: 5393832537420 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query40.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query40.q.out index 005a3f992fd5..a30c659f78b2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query40.q.out @@ -7,22 +7,22 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 7 (BROADCAST_EDGE) - Map 5 <- Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Map 1 <- Reducer 8 (BROADCAST_EDGE) + Map 5 <- Reducer 8 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: catalog_sales - filterExpr: (cs_warehouse_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (cs_warehouse_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 43005109025 Data size: 6179957594616 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cs_warehouse_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (cs_warehouse_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 42897418825 Data size: 6164482203784 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cs_warehouse_sk (type: bigint), cs_item_sk (type: bigint), cs_order_number (type: bigint), cs_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) @@ -41,10 +41,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: catalog_returns - filterExpr: (cr_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (cr_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 4320980099 Data size: 543456366240 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cr_item_sk BETWEEN DynamicValue(RS_17_item_i_item_sk_min) AND DynamicValue(RS_17_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_17_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (cr_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 4320980099 Data size: 543456366240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cr_item_sk (type: bigint), cr_order_number (type: bigint), cr_refunded_cash (type: decimal(7,2)) @@ -60,43 +60,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 6 - Map Operator Tree: - TableScan - alias: item - filterExpr: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) - Statistics: Num rows: 462000 Data size: 101509408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) - Statistics: Num rows: 6416 Data size: 1409840 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 6416 Data size: 51328 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 8 Map Operator Tree: TableScan alias: date_dim @@ -134,6 +97,43 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) + Statistics: Num rows: 462000 Data size: 101509408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: i_current_price BETWEEN 0.99 AND 1.49 (type: boolean) + Statistics: Num rows: 6416 Data size: 1409840 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6416 Data size: 692928 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 6416 Data size: 51328 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan @@ -170,21 +170,21 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col4 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col4, _col7, _col9 + outputColumnNames: _col0, _col1, _col3, _col7, _col9, _col10 input vertices: 1 Map 6 - Statistics: Num rows: 947588639 Data size: 203304102788 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7580978039 Data size: 1018266906400 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col4 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col7, _col9, _col11, _col12 + outputColumnNames: _col0, _col3, _col7, _col9, _col10, _col12 input vertices: - 1 Map 8 + 1 Map 7 Statistics: Num rows: 105280419 Data size: 11370285484 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -192,18 +192,18 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col3, _col7, _col9, _col11, _col12, _col14 + outputColumnNames: _col3, _col7, _col9, _col10, _col12, _col14 input vertices: 1 Map 9 Statistics: Num rows: 105280419 Data size: 20424401510 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ - keys: _col14 (type: char(2)), _col9 (type: string) + keys: _col14 (type: char(2)), _col12 (type: string) null sort order: zz Statistics: Num rows: 105280419 Data size: 20424401510 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Select Operator - expressions: _col14 (type: char(2)), _col9 (type: string), if(_col11, (_col3 - if(_col7 is not null, _col7, 0)), 0) (type: decimal(8,2)), if(_col12, (_col3 - if(_col7 is not null, _col7, 0)), 0) (type: decimal(8,2)) + expressions: _col14 (type: char(2)), _col12 (type: string), if(_col9, (_col3 - if(_col7 is not null, _col7, 0)), 0) (type: decimal(8,2)), if(_col10, (_col3 - if(_col7 is not null, _col7, 0)), 0) (type: decimal(8,2)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 105280419 Data size: 20424401510 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -212,13 +212,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 684480 Data size: 280636800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6159360 Data size: 2525337600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(2)), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: char(2)), _col1 (type: string) - Statistics: Num rows: 684480 Data size: 280636800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6159360 Data size: 2525337600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(18,2)), _col3 (type: decimal(18,2)) Reducer 3 Execution mode: vectorized, llap @@ -228,12 +228,12 @@ STAGE PLANS: keys: KEY._col0 (type: char(2)), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8556 Data size: 3507960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 76992 Data size: 31566720 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(2)), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 8556 Data size: 3507960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 76992 Data size: 31566720 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(18,2)), _col3 (type: decimal(18,2)) Reducer 4 Execution mode: vectorized, llap @@ -241,7 +241,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: char(2)), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(18,2)), VALUE._col1 (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8556 Data size: 3507960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 76992 Data size: 31566720 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 41000 Basic stats: COMPLETE Column stats: COMPLETE @@ -252,7 +252,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query5.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query5.q.out index adbdacef39ef..ea66858b591b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query5.q.out @@ -7,11 +7,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 21 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 12 <- Map 13 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 10 (CONTAINS) + Map 1 <- Map 20 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 12 <- Map 13 (BROADCAST_EDGE), Map 20 (BROADCAST_EDGE), Union 10 (CONTAINS) Map 14 <- Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 15 (CONTAINS) - Map 7 <- Map 21 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 9 <- Map 13 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 10 (CONTAINS) + Map 7 <- Map 20 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 9 <- Map 13 (BROADCAST_EDGE), Map 20 (BROADCAST_EDGE), Union 10 (CONTAINS) Reducer 11 <- Union 10 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 16 <- Union 15 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 18 <- Map 17 (CUSTOM_SIMPLE_EDGE), Map 19 (CUSTOM_SIMPLE_EDGE), Map 20 (BROADCAST_EDGE), Map 21 (BROADCAST_EDGE), Union 15 (CONTAINS) @@ -25,7 +25,7 @@ STAGE PLANS: TableScan alias: store_sales filterExpr: ss_store_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_231_container, bigKeyColName:ss_store_sk, smallTablePos:1, keyRatio:1.0756178660512734 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_232_container, bigKeyColName:ss_store_sk, smallTablePos:1, keyRatio:0.11950491485837746 Statistics: Num rows: 82510879939 Data size: 19351122693824 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ss_store_sk is not null (type: boolean) @@ -38,35 +38,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: - 1 Map 8 - Statistics: Num rows: 88750176606 Data size: 48460575985864 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 20 + Statistics: Num rows: 9860455682 Data size: 3580319207704 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: - 1 Map 21 + 1 Map 8 Statistics: Num rows: 9860455682 Data size: 4519007506664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1729994 Data size: 948036712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15516987 Data size: 8503308876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1729994 Data size: 948036712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15516987 Data size: 8503308876 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -87,35 +87,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: - 1 Map 13 - Statistics: Num rows: 47131652878 Data size: 26162171562344 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 20 + Statistics: Num rows: 5236491827 Data size: 2342411162624 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: - 1 Map 21 + 1 Map 13 Statistics: Num rows: 5236491827 Data size: 2826570083372 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 56303158 Data size: 30854130584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 506728422 Data size: 277687175256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 56303158 Data size: 30854130584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 506728422 Data size: 277687175256 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -149,7 +149,7 @@ STAGE PLANS: TableScan alias: web_sales filterExpr: ws_web_site_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_235_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.1458761134564632 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_236_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:0.23841435728939733 Statistics: Num rows: 21594638446 Data size: 5182388988880 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ws_web_site_sk is not null (type: boolean) @@ -162,35 +162,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: 1 Map 20 - Statistics: Num rows: 46339418820 Data size: 25753225754752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5148471846 Data size: 2336125623824 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: 1 Map 21 Statistics: Num rows: 5148471846 Data size: 2809871462440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 54885 Data size: 30076980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 461034 Data size: 252646632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 54885 Data size: 30076980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 461034 Data size: 252646632 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -235,31 +235,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 20 - Map Operator Tree: - TableScan - alias: web_site - Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: web_site_sk (type: bigint), web_site_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 21 Map Operator Tree: TableScan alias: date_dim @@ -406,6 +381,31 @@ STAGE PLANS: Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 21 + Map Operator Tree: + TableScan + alias: web_site + Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: web_site_sk (type: bigint), web_site_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 84 Data size: 9072 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Map 7 Map Operator Tree: TableScan @@ -423,35 +423,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: - 1 Map 8 - Statistics: Num rows: 88750176606 Data size: 48460575985864 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 20 + Statistics: Num rows: 9860455682 Data size: 3580319207704 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: - 1 Map 21 + 1 Map 8 Statistics: Num rows: 9860455682 Data size: 4519007506664 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1729994 Data size: 948036712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15516987 Data size: 8503308876 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1729994 Data size: 948036712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15516987 Data size: 8503308876 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -485,7 +485,7 @@ STAGE PLANS: TableScan alias: catalog_sales filterExpr: cs_catalog_page_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_233_container, bigKeyColName:cs_catalog_page_sk, smallTablePos:1, keyRatio:1.0959547352990346 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_234_container, bigKeyColName:cs_catalog_page_sk, smallTablePos:1, keyRatio:0.12176441231565975 Statistics: Num rows: 43005109025 Data size: 10308315074584 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cs_catalog_page_sk is not null (type: boolean) @@ -498,35 +498,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: - 1 Map 13 - Statistics: Num rows: 47131652878 Data size: 26162171562344 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 20 + Statistics: Num rows: 5236491827 Data size: 2342411162624 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: - 1 Map 21 + 1 Map 13 Statistics: Num rows: 5236491827 Data size: 2826570083372 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 56303158 Data size: 30854130584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 506728422 Data size: 277687175256 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 56303158 Data size: 30854130584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 506728422 Data size: 277687175256 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -538,16 +538,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5099 Data size: 2794252 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45891 Data size: 25148268 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'catalog channel' (type: string), concat('catalog_page', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5099 Data size: 3156281 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45891 Data size: 28406529 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: string), _col1 (type: string) null sort order: zz - Statistics: Num rows: 5202 Data size: 3219822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46812 Data size: 28974702 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) @@ -556,13 +556,13 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 16 Execution mode: vectorized, llap @@ -572,16 +572,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 2740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 42 Data size: 23016 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'web channel' (type: string), concat('web_site', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 3075 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 42 Data size: 25830 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: string), _col1 (type: string) null sort order: zz - Statistics: Num rows: 5202 Data size: 3219822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46812 Data size: 28974702 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) @@ -590,13 +590,13 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 18 Execution mode: vectorized, llap @@ -620,35 +620,35 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 input vertices: 1 Map 20 - Statistics: Num rows: 46339418820 Data size: 25753225754752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5148471846 Data size: 2336125623824 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col7 + outputColumnNames: _col2, _col3, _col4, _col5, _col8 input vertices: 1 Map 21 Statistics: Num rows: 5148471846 Data size: 2809871462440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2), sum(_col4), sum(_col3), sum(_col5) - keys: _col7 (type: string) + keys: _col8 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 54885 Data size: 30076980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 461034 Data size: 252646632 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 54885 Data size: 30076980 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 461034 Data size: 252646632 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(17,2)), _col4 (type: decimal(17,2)) Reducer 3 Execution mode: vectorized, llap @@ -658,16 +658,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 98 Data size: 53704 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 879 Data size: 481692 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'store channel' (type: string), concat('store', _col0) (type: string), _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), (_col3 - _col4) (type: decimal(18,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 98 Data size: 60466 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 879 Data size: 542343 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: _col0 (type: string), _col1 (type: string) null sort order: zz - Statistics: Num rows: 5202 Data size: 3219822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 46812 Data size: 28974702 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Group By Operator aggregations: sum(_col2), sum(_col3), sum(_col4) @@ -676,13 +676,13 @@ STAGE PLANS: minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) Reducer 5 Execution mode: vectorized, llap @@ -692,17 +692,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col3, _col4, _col5 - Statistics: Num rows: 7803 Data size: 4892481 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 7803 Data size: 4830057 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 43464942 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 7803 Data size: 4830057 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 43464942 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(28,2)) Reducer 6 Execution mode: vectorized, llap @@ -710,7 +710,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(28,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 7803 Data size: 4830057 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 70218 Data size: 43464942 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 61900 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out index 7c68da53ee81..5091dfb25a49 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query51.q.out @@ -42,13 +42,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8110898127 Data size: 1427518070352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16221796254 Data size: 2855036140704 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: az sort order: ++ Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8110898127 Data size: 1427518070352 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 16221796254 Data size: 2855036140704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -78,13 +78,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2122773538 Data size: 373608142688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245547076 Data size: 747216285376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: az sort order: ++ Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 2122773538 Data size: 373608142688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245547076 Data size: 747216285376 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -157,7 +157,7 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 @@ -180,17 +180,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), sum_window_0 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 4055449063 Data size: 713759035088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)) Reducer 3 Execution mode: llap @@ -202,13 +202,13 @@ STAGE PLANS: 0 _col0 (type: bigint), _col1 (type: date) 1 _col0 (type: bigint), _col1 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 47389106998950 Data size: 16680965663630400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint), CASE WHEN (_col4 is not null) THEN (_col4) ELSE (_col1) END (type: date) null sort order: az sort order: ++ Map-reduce partition columns: CASE WHEN (_col3 is not null) THEN (_col3) ELSE (_col0) END (type: bigint) - Statistics: Num rows: 47389106998950 Data size: 16680965663630400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(27,2)), _col3 (type: bigint), _col4 (type: date), _col5 (type: decimal(27,2)) Reducer 4 Execution mode: vectorized, llap @@ -216,7 +216,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), VALUE._col1 (type: date), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: bigint), VALUE._col4 (type: date), VALUE._col5 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 47389106998950 Data size: 16680965663630400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -242,25 +242,25 @@ STAGE PLANS: name: max window function: GenericUDAFMaxEvaluator window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 47389106998950 Data size: 16680965663630400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11706390111 Data size: 4120649319072 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (max_window_0 > max_window_1) (type: boolean) - Statistics: Num rows: 15796368999650 Data size: 5560321887876800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1373549773024 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ keys: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date) null sort order: zz - Statistics: Num rows: 15796368999650 Data size: 5560321887876800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1373549773024 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Select Operator expressions: if(_col3 is not null, _col3, _col0) (type: bigint), if(_col4 is not null, _col4, _col1) (type: date), _col5 (type: decimal(27,2)), _col2 (type: decimal(27,2)), max_window_0 (type: decimal(27,2)), max_window_1 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 15796368999650 Data size: 8087740927820800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ - Statistics: Num rows: 15796368999650 Data size: 8087740927820800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)), _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(27,2)) Reducer 5 Execution mode: vectorized, llap @@ -268,7 +268,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: date), VALUE._col0 (type: decimal(27,2)), VALUE._col1 (type: decimal(27,2)), VALUE._col2 (type: decimal(27,2)), VALUE._col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 15796368999650 Data size: 8087740927820800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3902130037 Data size: 1997890578944 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 51200 Basic stats: COMPLETE Column stats: COMPLETE @@ -287,7 +287,7 @@ STAGE PLANS: keys: KEY._col0 (type: bigint), KEY._col1 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), _col2 (type: decimal(17,2)) outputColumnNames: _col0, _col1, _col2 @@ -310,17 +310,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumHiveDecimal window frame: ROWS PRECEDING(MAX)~CURRENT - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: date), sum_window_0 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: date) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 1061386769 Data size: 186804071344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32608329 Data size: 5739065904 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(27,2)) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out index 0091d3885c34..31dc5e2d3eca 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8,15 +9,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) - Map 6 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 9 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 10 <- Map 9 (SIMPLE_EDGE) + Map 1 <- Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Map 10 <- Map 13 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 13 <- Reducer 6 (BROADCAST_EDGE) + Map 15 <- Map 13 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 3 <- Reducer 14 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) + Reducer 16 <- Map 15 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Reducer 7 (BROADCAST_EDGE) + Reducer 6 <- Map 4 (CUSTOM_SIMPLE_EDGE), Map 8 (BROADCAST_EDGE) + Reducer 7 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -46,8 +53,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 5 - Statistics: Num rows: 43005109025 Data size: 5148566336008 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 8 + Statistics: Num rows: 3532295 Data size: 28258472 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -56,25 +63,197 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 11 - Statistics: Num rows: 43005109025 Data size: 9105036366308 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 3532295 Data size: 353229612 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.92992544 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 8803686108 Data size: 1866381454896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 495048 Data size: 104950176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8803686108 Data size: 1866381454896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 495048 Data size: 104950176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 11 + Map 10 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 13 + Statistics: Num rows: 6777167 Data size: 54217448 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 17 + Statistics: Num rows: 6777167 Data size: 677716812 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.9634768 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 742572 Data size: 157425264 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 742572 Data size: 157425264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: date), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2 + input vertices: + 0 Reducer 6 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.8333333 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 15 + Map Operator Tree: + TableScan + alias: web_sales + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col4 + input vertices: + 1 Map 3 + Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col4 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 13 + Statistics: Num rows: 1773711 Data size: 14189800 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col6 + input vertices: + 1 Map 17 + Statistics: Num rows: 1773711 Data size: 177371212 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col6 (type: string) + minReductionHashAggr: 0.86044854 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 17 Map Operator Tree: TableScan alias: item @@ -110,10 +289,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_date is not null or ((d_date = DATE'1998-02-19') and d_week_seq is not null) or (d_date = DATE'1998-02-19')) (type: boolean) + filterExpr: (d_date is not null and ((d_date BETWEEN DynamicValue(RS_36_date_dim_d_date_min) AND DynamicValue(RS_36_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_36_date_dim_d_date_bloom_filter))) or (d_date BETWEEN DynamicValue(RS_82_date_dim_d_date_min) AND DynamicValue(RS_82_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_82_date_dim_d_date_bloom_filter))))) (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: d_date is not null (type: boolean) + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_36_date_dim_d_date_min) AND DynamicValue(RS_36_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_36_date_dim_d_date_bloom_filter))) (type: boolean) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_date_sk (type: bigint), d_date (type: date) @@ -142,6 +321,13 @@ STAGE PLANS: Partition key expr: cs_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE Target Vertex: Map 1 + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_82_date_dim_d_date_min) AND DynamicValue(RS_82_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_82_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -164,7 +350,7 @@ STAGE PLANS: Target Input: store_sales Partition key expr: ss_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 6 + Target Vertex: Map 10 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -187,28 +373,23 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 9 - Filter Operator - predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36524 Data size: 2191440 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 146096 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 36524 Data size: 146096 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 15 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: ((d_date = DATE'1998-02-19') or ((d_date = DATE'1998-02-19') and d_week_seq is not null)) (type: boolean) + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_date = DATE'1998-02-19') (type: boolean) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -217,13 +398,30 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Filter Operator + predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 8 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date = DATE'1998-02-19') and d_week_seq is not null)) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -236,159 +434,139 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1 + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col2 input vertices: - 1 Reducer 4 - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 Reducer 5 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: date) outputColumnNames: _col0 - input vertices: - 1 Map 3 - Statistics: Num rows: 236172 Data size: 13225632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.8333333 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Filter Operator + predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 3 - Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 82510879939 Data size: 9683309686440 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 82510879939 Data size: 17274310640828 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16702424472 Data size: 3540913988064 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16702424472 Data size: 3540913988064 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + Reducer 11 Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: web_sales - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) + Statistics: Num rows: 3055 Data size: 989820 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 input vertices: - 1 Map 3 - Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 21594638446 Data size: 2591054005984 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 21594638446 Data size: 4577760743016 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4426224168 Data size: 938359523616 Basic stats: COMPLETE Column stats: COMPLETE + 1 Reducer 16 + Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + Top N Key Operator + sort order: ++ + keys: _col0 (type: string), _col3 (type: decimal(17,2)) + null sort order: zz + Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE + top n: 100 + Select Operator + expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4426224168 Data size: 938359523616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) + null sort order: zz + sort order: ++ + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) + Reducer 12 Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 10 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 14 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 16 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -424,7 +602,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) - Reducer 4 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -437,78 +615,75 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1 + input vertices: + 1 Reducer 7 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) + aggregations: count(VALUE._col0) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Reducer 2 - Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) - Statistics: Num rows: 3055 Data size: 989820 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: sq_count_check(_col0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 + 0 + 1 + outputColumnNames: _col1 input vertices: - 1 Reducer 10 - Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) - Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE - Top N Key Operator - sort order: ++ - keys: _col0 (type: string), _col3 (type: decimal(17,2)) - null sort order: zz - Statistics: Num rows: 1 Data size: 660 Basic stats: COMPLETE Column stats: COMPLETE - top n: 100 - Select Operator - expressions: _col0 (type: string), _col3 (type: decimal(17,2)), (((_col3 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col1 (type: decimal(17,2)), (((_col1 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), _col5 (type: decimal(17,2)), (((_col5 / ((_col3 + _col1) + _col5)) / 3) * 100) (type: decimal(38,17)), (((_col3 + _col1) + _col5) / 3) (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: decimal(17,2)) - null sort order: zz - sort order: ++ - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 8 + 1 Map 8 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + expressions: VALUE._col0 (type: int) + outputColumnNames: _col0 + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query80.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query80.q.out index 86ce0891d9b0..b50f09d6e5a1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query80.q.out @@ -7,36 +7,36 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 16 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) - Map 10 <- Reducer 15 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) - Map 13 <- Reducer 15 (BROADCAST_EDGE) - Map 23 <- Reducer 17 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) - Map 26 <- Reducer 17 (BROADCAST_EDGE) - Map 7 <- Reducer 16 (BROADCAST_EDGE) - Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Map 13 (CUSTOM_SIMPLE_EDGE), Map 14 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 22 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) - Reducer 16 <- Map 14 (CUSTOM_SIMPLE_EDGE) - Reducer 17 <- Map 14 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 21 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 13 <- Reducer 10 (BROADCAST_EDGE), Reducer 19 (BROADCAST_EDGE) + Map 16 <- Reducer 19 (BROADCAST_EDGE) + Map 23 <- Reducer 11 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE) + Map 26 <- Reducer 20 (BROADCAST_EDGE) + Map 7 <- Reducer 21 (BROADCAST_EDGE) + Reducer 10 <- Map 8 (CUSTOM_SIMPLE_EDGE) + Reducer 11 <- Map 8 (CUSTOM_SIMPLE_EDGE) + Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE), Map 16 (CUSTOM_SIMPLE_EDGE), Map 17 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 22 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Reducer 15 <- Reducer 14 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 19 <- Map 18 (CUSTOM_SIMPLE_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 14 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 7 (CUSTOM_SIMPLE_EDGE), Map 8 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 12 (BROADCAST_EDGE), Map 17 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 7 (CUSTOM_SIMPLE_EDGE), Map 8 (BROADCAST_EDGE) Reducer 20 <- Map 18 (CUSTOM_SIMPLE_EDGE) Reducer 21 <- Map 18 (CUSTOM_SIMPLE_EDGE) - Reducer 24 <- Map 14 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 23 (CUSTOM_SIMPLE_EDGE), Map 26 (CUSTOM_SIMPLE_EDGE), Map 27 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) + Reducer 24 <- Map 17 (BROADCAST_EDGE), Map 18 (BROADCAST_EDGE), Map 23 (CUSTOM_SIMPLE_EDGE), Map 26 (CUSTOM_SIMPLE_EDGE), Map 27 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) Reducer 25 <- Reducer 24 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 5 <- Union 4 (SIMPLE_EDGE) Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: store_sales - filterExpr: (ss_store_sk is not null and ss_promo_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and ss_promo_sk BETWEEN DynamicValue(RS_23_promotion_p_promo_sk_min) AND DynamicValue(RS_23_promotion_p_promo_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter)) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_23_promotion_p_promo_sk_bloom_filter))) (type: boolean) + filterExpr: (ss_store_sk is not null and ss_promo_sk is not null and ss_item_sk BETWEEN DynamicValue(RS_23_item_i_item_sk_min) AND DynamicValue(RS_23_item_i_item_sk_max) and ss_promo_sk BETWEEN DynamicValue(RS_26_promotion_p_promo_sk_min) AND DynamicValue(RS_26_promotion_p_promo_sk_max) and in_bloom_filter(ss_item_sk, DynamicValue(RS_23_item_i_item_sk_bloom_filter)) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_26_promotion_p_promo_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 82510879939 Data size: 21315868812296 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ss_store_sk is not null and ss_promo_sk is not null and ss_promo_sk BETWEEN DynamicValue(RS_23_promotion_p_promo_sk_min) AND DynamicValue(RS_23_promotion_p_promo_sk_max) and ss_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_23_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(ss_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (ss_store_sk is not null and ss_promo_sk is not null and ss_promo_sk BETWEEN DynamicValue(RS_26_promotion_p_promo_sk_min) AND DynamicValue(RS_26_promotion_p_promo_sk_max) and ss_item_sk BETWEEN DynamicValue(RS_23_item_i_item_sk_min) AND DynamicValue(RS_23_item_i_item_sk_max) and in_bloom_filter(ss_promo_sk, DynamicValue(RS_26_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(ss_item_sk, DynamicValue(RS_23_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 78675502838 Data size: 20325037116048 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_item_sk (type: bigint), ss_store_sk (type: bigint), ss_promo_sk (type: bigint), ss_ticket_number (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_net_profit (type: decimal(7,2)), ss_sold_date_sk (type: bigint) @@ -51,14 +51,32 @@ STAGE PLANS: value expressions: _col1 (type: bigint), _col2 (type: bigint), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)), _col6 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 10 + Map 12 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: bigint), s_store_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 Map Operator Tree: TableScan alias: catalog_sales - filterExpr: (cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_57_item_i_item_sk_min) AND DynamicValue(RS_57_item_i_item_sk_max) and cs_promo_sk BETWEEN DynamicValue(RS_60_promotion_p_promo_sk_min) AND DynamicValue(RS_60_promotion_p_promo_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_57_item_i_item_sk_bloom_filter)) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_60_promotion_p_promo_sk_bloom_filter))) (type: boolean) + filterExpr: (cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_item_sk BETWEEN DynamicValue(RS_60_item_i_item_sk_min) AND DynamicValue(RS_60_item_i_item_sk_max) and cs_promo_sk BETWEEN DynamicValue(RS_63_promotion_p_promo_sk_min) AND DynamicValue(RS_63_promotion_p_promo_sk_max) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_item_i_item_sk_bloom_filter)) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_63_promotion_p_promo_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 43005109025 Data size: 11339575410520 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_promo_sk BETWEEN DynamicValue(RS_60_promotion_p_promo_sk_min) AND DynamicValue(RS_60_promotion_p_promo_sk_max) and cs_item_sk BETWEEN DynamicValue(RS_57_item_i_item_sk_min) AND DynamicValue(RS_57_item_i_item_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_60_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(cs_item_sk, DynamicValue(RS_57_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (cs_catalog_page_sk is not null and cs_promo_sk is not null and cs_promo_sk BETWEEN DynamicValue(RS_63_promotion_p_promo_sk_min) AND DynamicValue(RS_63_promotion_p_promo_sk_max) and cs_item_sk BETWEEN DynamicValue(RS_60_item_i_item_sk_min) AND DynamicValue(RS_60_item_i_item_sk_max) and in_bloom_filter(cs_promo_sk, DynamicValue(RS_63_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(cs_item_sk, DynamicValue(RS_60_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 42789551679 Data size: 11282737308320 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cs_catalog_page_sk (type: bigint), cs_item_sk (type: bigint), cs_promo_sk (type: bigint), cs_order_number (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_net_profit (type: decimal(7,2)), cs_sold_date_sk (type: bigint) @@ -73,14 +91,14 @@ STAGE PLANS: value expressions: _col0 (type: bigint), _col2 (type: bigint), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)), _col6 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 13 + Map 16 Map Operator Tree: TableScan alias: catalog_returns - filterExpr: (cr_item_sk BETWEEN DynamicValue(RS_57_item_i_item_sk_min) AND DynamicValue(RS_57_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_57_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (cr_item_sk BETWEEN DynamicValue(RS_60_item_i_item_sk_min) AND DynamicValue(RS_60_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_60_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 4320980099 Data size: 1017653227728 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (cr_item_sk BETWEEN DynamicValue(RS_57_item_i_item_sk_min) AND DynamicValue(RS_57_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_57_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (cr_item_sk BETWEEN DynamicValue(RS_60_item_i_item_sk_min) AND DynamicValue(RS_60_item_i_item_sk_max) and in_bloom_filter(cr_item_sk, DynamicValue(RS_60_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 4320980099 Data size: 1017653227728 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cr_item_sk (type: bigint), cr_order_number (type: bigint), cr_return_amount (type: decimal(7,2)), cr_net_loss (type: decimal(7,2)) @@ -95,107 +113,110 @@ STAGE PLANS: value expressions: _col2 (type: decimal(7,2)), _col3 (type: decimal(7,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 14 + Map 17 Map Operator Tree: TableScan - alias: item - filterExpr: (i_current_price > 50) (type: boolean) - Statistics: Num rows: 462000 Data size: 55309408 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (i_current_price > 50) (type: boolean) - Statistics: Num rows: 231185 Data size: 27676904 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: i_item_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cs_sold_date_sk (bigint) + Target Input: catalog_sales + Partition key expr: cs_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 13 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ss_sold_date_sk (bigint) + Target Input: store_sales + Partition key expr: ss_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ws_sold_date_sk (bigint) + Target Input: web_sales + Partition key expr: ws_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 23 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 18 Map Operator Tree: TableScan - alias: promotion - filterExpr: (p_channel_tv = 'N') (type: boolean) - Statistics: Num rows: 2300 Data size: 213900 Basic stats: COMPLETE Column stats: COMPLETE + alias: item + filterExpr: (i_current_price > 50) (type: boolean) + Statistics: Num rows: 462000 Data size: 55309408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (p_channel_tv = 'N') (type: boolean) - Statistics: Num rows: 1150 Data size: 106950 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (i_current_price > 50) (type: boolean) + Statistics: Num rows: 231185 Data size: 27676904 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_promo_sk (type: bigint) + expressions: i_item_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) minReductionHashAggr: 0.99 @@ -212,11 +233,11 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) minReductionHashAggr: 0.99 @@ -233,11 +254,11 @@ STAGE PLANS: null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 231185 Data size: 1849480 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) minReductionHashAggr: 0.99 @@ -273,10 +294,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_sales - filterExpr: (ws_web_site_sk is not null and ws_promo_sk is not null and ws_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and ws_promo_sk BETWEEN DynamicValue(RS_98_promotion_p_promo_sk_min) AND DynamicValue(RS_98_promotion_p_promo_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter)) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_98_promotion_p_promo_sk_bloom_filter))) (type: boolean) + filterExpr: (ws_web_site_sk is not null and ws_promo_sk is not null and ws_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and ws_promo_sk BETWEEN DynamicValue(RS_101_promotion_p_promo_sk_min) AND DynamicValue(RS_101_promotion_p_promo_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter)) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_101_promotion_p_promo_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 21594638446 Data size: 5700638697608 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ws_web_site_sk is not null and ws_promo_sk is not null and ws_promo_sk BETWEEN DynamicValue(RS_98_promotion_p_promo_sk_min) AND DynamicValue(RS_98_promotion_p_promo_sk_max) and ws_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_98_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(ws_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (ws_web_site_sk is not null and ws_promo_sk is not null and ws_promo_sk BETWEEN DynamicValue(RS_101_promotion_p_promo_sk_min) AND DynamicValue(RS_101_promotion_p_promo_sk_max) and ws_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and in_bloom_filter(ws_promo_sk, DynamicValue(RS_101_promotion_p_promo_sk_bloom_filter)) and in_bloom_filter(ws_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 21589233207 Data size: 5699211801048 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ws_item_sk (type: bigint), ws_web_site_sk (type: bigint), ws_promo_sk (type: bigint), ws_order_number (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_net_profit (type: decimal(7,2)), ws_sold_date_sk (type: bigint) @@ -295,10 +316,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_returns - filterExpr: (wr_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and in_bloom_filter(wr_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (wr_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and in_bloom_filter(wr_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 2160007345 Data size: 496628694560 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (wr_item_sk BETWEEN DynamicValue(RS_95_item_i_item_sk_min) AND DynamicValue(RS_95_item_i_item_sk_max) and in_bloom_filter(wr_item_sk, DynamicValue(RS_95_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (wr_item_sk BETWEEN DynamicValue(RS_98_item_i_item_sk_min) AND DynamicValue(RS_98_item_i_item_sk_max) and in_bloom_filter(wr_item_sk, DynamicValue(RS_98_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 2160007345 Data size: 496628694560 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: wr_item_sk (type: bigint), wr_order_number (type: bigint), wr_return_amt (type: decimal(7,2)), wr_net_loss (type: decimal(7,2)) @@ -335,10 +356,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_returns - filterExpr: (sr_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(sr_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: (sr_item_sk BETWEEN DynamicValue(RS_23_item_i_item_sk_min) AND DynamicValue(RS_23_item_i_item_sk_max) and in_bloom_filter(sr_item_sk, DynamicValue(RS_23_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 8634166995 Data size: 2004678961248 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (sr_item_sk BETWEEN DynamicValue(RS_20_item_i_item_sk_min) AND DynamicValue(RS_20_item_i_item_sk_max) and in_bloom_filter(sr_item_sk, DynamicValue(RS_20_item_i_item_sk_bloom_filter))) (type: boolean) + predicate: (sr_item_sk BETWEEN DynamicValue(RS_23_item_i_item_sk_min) AND DynamicValue(RS_23_item_i_item_sk_max) and in_bloom_filter(sr_item_sk, DynamicValue(RS_23_item_i_item_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 8634166995 Data size: 2004678961248 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sr_item_sk (type: bigint), sr_ticket_number (type: bigint), sr_return_amt (type: decimal(7,2)), sr_net_loss (type: decimal(7,2)) @@ -356,103 +377,108 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: promotion + filterExpr: (p_channel_tv = 'N') (type: boolean) + Statistics: Num rows: 2300 Data size: 213900 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-08-04 00:00:00' AND TIMESTAMP'1998-09-03 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (p_channel_tv = 'N') (type: boolean) + Statistics: Num rows: 1150 Data size: 106950 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint) + expressions: p_promo_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ss_sold_date_sk (bigint) - Target Input: store_sales - Partition key expr: ss_sold_date_sk - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ws_sold_date_sk (bigint) - Target Input: web_sales - Partition key expr: ws_sold_date_sk - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 23 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1150 Data size: 9200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: cs_sold_date_sk (bigint) - Target Input: catalog_sales - Partition key expr: cs_sold_date_sk - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 10 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan - alias: store - Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: s_store_sk (type: bigint), s_store_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1704 Data size: 184032 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Reducer 10 Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reducer 11 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Map Join Operator @@ -463,34 +489,34 @@ STAGE PLANS: 1 KEY.reducesinkkey0 (type: bigint), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col9, _col10 input vertices: - 1 Map 13 + 1 Map 16 Statistics: Num rows: 68128960197 Data size: 26694756517832 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col6 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col9, _col10 input vertices: - 1 Map 14 - Statistics: Num rows: 34091760570 Data size: 10084166612312 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 7569366263 Data size: 1863498498512 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col0, _col2, _col4, _col5, _col9, _col10 input vertices: 1 Map 18 - Statistics: Num rows: 17045880285 Data size: 4078164892288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3787714088 Data size: 895347046408 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0, _col4, _col5, _col9, _col10 input vertices: @@ -524,7 +550,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 107889741 Data size: 47039927076 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(17,2)), _col3 (type: decimal(18,2)) - Reducer 12 + Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -558,43 +584,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 70218 Data size: 44026686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: decimal(27,2)), _col4 (type: decimal(27,2)), _col5 (type: decimal(28,2)) - Reducer 15 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reducer 16 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reducer 17 + Reducer 19 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -612,19 +602,6 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Reducer 19 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: @@ -643,27 +620,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col6 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col9, _col10 input vertices: - 1 Map 14 - Statistics: Num rows: 62864387256 Data size: 22152162793776 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 13957729495 Data size: 3016221281800 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col4, _col5, _col9, _col10 input vertices: 1 Map 18 - Statistics: Num rows: 31432193628 Data size: 7113224649584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6984453758 Data size: 1230973268960 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col1, _col4, _col5, _col9, _col10 input vertices: @@ -677,7 +654,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col4, _col5, _col9, _col10, _col15 input vertices: - 1 Map 9 + 1 Map 12 Statistics: Num rows: 3492226879 Data size: 715790771852 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col15 (type: string), _col4 (type: decimal(7,2)), if(_col9 is not null, _col9, 0) (type: decimal(7,2)), (_col5 - if(_col10 is not null, _col10, 0)) (type: decimal(8,2)) @@ -710,6 +687,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reducer 21 Execution mode: vectorized, llap Reduce Operator Tree: @@ -723,6 +705,11 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Reducer 24 Execution mode: vectorized, llap Reduce Operator Tree: @@ -741,27 +728,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col6 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col9, _col10 input vertices: - 1 Map 14 - Statistics: Num rows: 16830120307 Data size: 6191043150168 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 17 + Statistics: Num rows: 3736778117 Data size: 926375207640 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col9, _col10 + outputColumnNames: _col1, _col2, _col4, _col5, _col9, _col10 input vertices: 1 Map 18 - Statistics: Num rows: 8415060154 Data size: 2151835885288 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1869885355 Data size: 448426719824 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col6 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col1, _col4, _col5, _col9, _col10 input vertices: @@ -900,6 +887,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Union 4 Vertex: Union 4 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out index 39b248473790..c25dea04819d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out @@ -7,17 +7,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Map 12 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) - Map 3 <- Map 8 (BROADCAST_EDGE) - Map 5 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) - Map 8 <- Map 11 (BROADCAST_EDGE) - Reducer 10 <- Map 8 (SIMPLE_EDGE) - Reducer 13 <- Map 12 (SIMPLE_EDGE) + Map 1 <- Map 15 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) + Map 13 <- Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Map 15 <- Reducer 10 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) + Map 3 <- Map 9 (BROADCAST_EDGE) + Map 6 <- Map 15 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 9 (BROADCAST_EDGE) + Map 9 <- Map 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (SIMPLE_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -37,7 +39,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Map 8 + 1 Map 15 Statistics: Num rows: 4320980099 Data size: 293480294712 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -48,7 +50,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 4320980099 Data size: 51505409168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1183036 Data size: 9464292 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -57,21 +59,21 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 4320980099 Data size: 449035578276 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 1183036 Data size: 118303604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.7907722 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 434404620 Data size: 46915698960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 434404620 Data size: 46915698960 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) @@ -83,26 +85,32 @@ STAGE PLANS: Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) - minReductionHashAggr: 0.690705 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 12 + Map 13 Map Operator Tree: TableScan alias: web_returns @@ -119,7 +127,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 9 + 1 Map 15 Statistics: Num rows: 2062802370 Data size: 140076140668 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -129,8 +137,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 8 - Statistics: Num rows: 2062802370 Data size: 24559207948 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 564772 Data size: 4518180 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -139,30 +147,121 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 2062802370 Data size: 214337025988 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 564772 Data size: 56477204 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.5617275 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 207425112 Data size: 22401912096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 207425112 Data size: 22401912096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 15 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (d_date is not null and ((d_date BETWEEN DynamicValue(RS_98_date_dim_d_date_min) AND DynamicValue(RS_98_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_98_date_dim_d_date_bloom_filter))) or (d_date BETWEEN DynamicValue(RS_26_date_dim_d_date_min) AND DynamicValue(RS_26_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_26_date_dim_d_date_bloom_filter))))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_98_date_dim_d_date_min) AND DynamicValue(RS_98_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_98_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: wr_returned_date_sk (bigint) + Target Input: web_returns + Partition key expr: wr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 13 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: sr_returned_date_sk (bigint) + Target Input: store_returns + Partition key expr: sr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 6 + Filter Operator + predicate: (d_date is not null and d_date BETWEEN DynamicValue(RS_26_date_dim_d_date_min) AND DynamicValue(RS_26_date_dim_d_date_max) and in_bloom_filter(d_date, DynamicValue(RS_26_date_dim_d_date_bloom_filter))) (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cr_returned_date_sk (bigint) + Target Input: catalog_returns + Partition key expr: cr_returned_date_sk + Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Map 3 Map Operator Tree: TableScan alias: date_dim filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:2.7378882667798324E-4 Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -179,23 +278,38 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 8 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.95 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: item @@ -227,7 +341,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 6 Map Operator Tree: TableScan alias: store_returns @@ -244,7 +358,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col4 input vertices: - 1 Reducer 10 + 1 Map 15 Statistics: Num rows: 8332595709 Data size: 566008907392 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -254,8 +368,8 @@ STAGE PLANS: 1 _col0 (type: date) outputColumnNames: _col0, _col1 input vertices: - 1 Map 8 - Statistics: Num rows: 8332595709 Data size: 99383547688 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 9 + Statistics: Num rows: 2281371 Data size: 18250972 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -264,29 +378,29 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6 input vertices: - 1 Map 4 - Statistics: Num rows: 8332595709 Data size: 865982352916 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 5 + Statistics: Num rows: 2281371 Data size: 228137104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col6 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.8915021 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 837373692 Data size: 90436358736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 837373692 Data size: 90436358736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 8 + Map 9 Map Operator Tree: TableScan alias: date_dim - filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) or d_date is not null) (type: boolean) + filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null)) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) @@ -303,137 +417,104 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 11 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + 1 Reducer 12 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 11 + Statistics: Num rows: 19 Data size: 1064 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.4 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: date) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: date) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 1120 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.95 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) Filter Operator predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: d_week_seq (type: int) outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) - minReductionHashAggr: 0.690705 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: d_date is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint), d_date (type: date) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: wr_returned_date_sk (bigint) - Target Input: web_returns - Partition key expr: wr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 12 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: cr_returned_date_sk (bigint) - Target Input: catalog_returns - Partition key expr: cr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: sr_returned_date_sk (bigint) - Target Input: store_returns - Partition key expr: sr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Reducer 13 + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -469,7 +550,20 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Reducer 6 + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: binary) + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -496,7 +590,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col5, _col6 input vertices: - 1 Reducer 13 + 1 Reducer 14 Statistics: Num rows: 247524 Data size: 32673168 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ @@ -514,7 +608,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -531,19 +625,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query87.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query87.q.out index 334084e78ceb..ad5904db6f25 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query87.q.out @@ -252,13 +252,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4187240873 Data size: 988188846028 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 4187240873 Data size: 988188846028 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Reducer 11 Execution mode: vectorized, llap Reduce Operator Tree: @@ -266,38 +266,38 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2093620436 Data size: 494094422896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2093620436 Data size: 494094422896 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 1976377692056 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 523405109 Data size: 127710846596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 2043373546024 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), 1L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 523405109 Data size: 131898087468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8374481746 Data size: 2110369399992 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1513369688 Data size: 381369161376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3), sum(_col4) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 756684844 Data size: 190684580688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 756684844 Data size: 190684580688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 14 Execution mode: vectorized, llap @@ -318,13 +318,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2122508751 Data size: 500912065236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 2122508751 Data size: 500912065236 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: @@ -332,38 +332,38 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061254375 Data size: 250456032500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1061254375 Data size: 250456032500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1001824130708 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 265313593 Data size: 64736516692 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1035784270732 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), 1L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 265313593 Data size: 66859025436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4245017503 Data size: 1069744410756 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 281077860 Data size: 70831620720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3), sum(_col4) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 140538930 Data size: 35415810360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 140538930 Data size: 35415810360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 2 Execution mode: vectorized, llap @@ -384,13 +384,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7919716636 Data size: 1869053126096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15839433273 Data size: 3738106252428 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 7919716636 Data size: 1869053126096 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15839433273 Data size: 3738106252428 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Operator Tree: @@ -398,38 +398,38 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3959858318 Data size: 934526563048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2238783746216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: char(20)), _col0 (type: char(30)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3959858318 Data size: 934526563048 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2238783746216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: char(30)), _col0 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 989964579 Data size: 241551357276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2314674720664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), 2L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 989964579 Data size: 249471073908 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2390565695112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1513369688 Data size: 381369161376 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3), sum(_col4) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 756684844 Data size: 190684580688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 756684844 Data size: 190684580688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17860853552 Data size: 4500935095104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -439,41 +439,41 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 378342422 Data size: 95342290344 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9486371806 Data size: 2390565695112 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col3 > 0L) and ((_col3 * 2L) = _col4)) (type: boolean) - Statistics: Num rows: 63057070 Data size: 15890381640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1581061967 Data size: 398427615684 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63057070 Data size: 15890381640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1581061967 Data size: 398427615684 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 15764267 Data size: 3846481148 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1581061967 Data size: 385779119948 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), 2L (type: bigint), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 15764267 Data size: 3972595284 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1581061967 Data size: 398427615684 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date), _col4 (type: bigint), (_col3 * _col4) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 281077860 Data size: 70831620720 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3), sum(_col4) keys: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 140538930 Data size: 35415810360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: char(30)), _col1 (type: char(20)), _col2 (type: date) - Statistics: Num rows: 140538930 Data size: 35415810360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 7 Execution mode: vectorized, llap @@ -483,16 +483,16 @@ STAGE PLANS: keys: KEY._col0 (type: char(30)), KEY._col1 (type: char(20)), KEY._col2 (type: date) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 70269465 Data size: 17707905180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 1468172026440 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col3, _col4 - Statistics: Num rows: 70269465 Data size: 1124311440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5826079470 Data size: 93217271520 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col3 > 0L) and ((_col3 * 2L) = _col4)) (type: boolean) - Statistics: Num rows: 11711577 Data size: 187385232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 971013245 Data size: 15536211920 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 11711577 Data size: 187385232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 971013245 Data size: 15536211920 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() minReductionHashAggr: 0.99 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query92.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query92.q.out index c3abe9358aec..1e025671aef0 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query92.q.out @@ -7,18 +7,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 7 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (BROADCAST_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: web_sales - filterExpr: (ws_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) (type: boolean) + filterExpr: ws_ext_discount_amt is not null (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_87_container, bigKeyColName:ws_item_sk, smallTablePos:1, keyRatio:1.1253233093375219E-4 Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ws_ext_discount_amt is not null (type: boolean) @@ -27,30 +28,6 @@ STAGE PLANS: expressions: ws_item_sk (type: bigint), ws_ext_discount_amt (type: decimal(7,2)), ws_sold_date_sk (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21591933650 Data size: 2763464608128 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Map 5 - Statistics: Num rows: 21872348 Data size: 2496761472 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: bigint) - Statistics: Num rows: 21872348 Data size: 2496761472 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(7,2)), _col2 (type: bigint) - Filter Operator - predicate: (ws_item_sk BETWEEN DynamicValue(RS_25_item_i_item_sk_min) AND DynamicValue(RS_25_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_25_item_i_item_sk_bloom_filter))) (type: boolean) - Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: ws_item_sk (type: bigint), ws_ext_discount_amt (type: decimal(7,2)), ws_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -59,73 +36,77 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1 input vertices: - 1 Map 7 - Statistics: Num rows: 2399240019 Data size: 287605865240 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1), count(_col1) - keys: _col0 (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 57694920 Data size: 7384949760 Basic stats: COMPLETE Column stats: COMPLETE + 1 Map 3 + Statistics: Num rows: 2398939507 Data size: 287569841768 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 2430095 Data size: 19440872 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col4 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 57694920 Data size: 7384949760 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) + Map-reduce partition columns: _col4 (type: bigint) + Statistics: Num rows: 2430095 Data size: 19440872 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)) + Select Operator + expressions: _col4 (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 2430095 Data size: 19440760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col4), max(_col4), bloom_filter(_col4, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 3 Map Operator Tree: TableScan - alias: item - filterExpr: (i_manufact_id = 269) (type: boolean) - Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (i_manufact_id = 269) (type: boolean) - Statistics: Num rows: 468 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: i_item_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) - minReductionHashAggr: 0.99 + keys: _col0 (type: bigint) + minReductionHashAggr: 0.4 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 7 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1998-03-18 00:00:00' AND TIMESTAMP'1998-06-16 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0 + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ws_sold_date_sk (bigint) + Target Input: web_sales + Partition key expr: ws_sold_date_sk + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -147,29 +128,83 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 + Target Vertex: Map 5 + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 4 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_manufact_id = 269) (type: boolean) + Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_manufact_id = 269) (type: boolean) + Statistics: Num rows: 468 Data size: 5616 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 468 Data size: 3744 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 5 + Map Operator Tree: + TableScan + alias: web_sales + filterExpr: (ws_item_sk BETWEEN DynamicValue(RS_30_item_i_item_sk_min) AND DynamicValue(RS_30_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_30_item_i_item_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (ws_item_sk BETWEEN DynamicValue(RS_30_item_i_item_sk_min) AND DynamicValue(RS_30_item_i_item_sk_max) and in_bloom_filter(ws_item_sk, DynamicValue(RS_30_item_i_item_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ws_item_sk (type: bigint), ws_ext_discount_amt (type: decimal(7,2)), ws_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 21594638446 Data size: 2763810784048 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 2399240019 Data size: 287605865240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1), count(_col1) + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 57694920 Data size: 7384949760 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 57694920 Data size: 7384949760 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col1 (type: decimal(7,2)), VALUE._col2 (type: bigint) - outputColumnNames: _col3, _col1, _col2 + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col3 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: bigint) - Statistics: Num rows: 21872348 Data size: 2496761472 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(7,2)), _col2 (type: bigint) - Reducer 3 + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 6 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -189,25 +224,19 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col3 (type: bigint) + 0 _col4 (type: bigint) 1 _col1 (type: bigint) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col1, _col5 input vertices: - 0 Reducer 2 - Statistics: Num rows: 51330 Data size: 6159712 Basic stats: COMPLETE Column stats: COMPLETE + 0 Map 1 + Statistics: Num rows: 51330 Data size: 5749072 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col1 > _col4) (type: boolean) - Statistics: Num rows: 17110 Data size: 2053312 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) + predicate: (_col1 > _col5) (type: boolean) + Statistics: Num rows: 17110 Data size: 1916432 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: decimal(7,2)) outputColumnNames: _col1 - input vertices: - 1 Map 7 - Statistics: Num rows: 17110 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 17110 Data size: 1916432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) minReductionHashAggr: 0.99 @@ -219,7 +248,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(17,2)) - Reducer 4 + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -234,19 +263,6 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1000000) - mode: final - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query94.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query94.q.out index 6ca2d294e5f3..acc0f2da6bfd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query94.q.out @@ -23,7 +23,7 @@ STAGE PLANS: TableScan alias: ws1 filterExpr: (ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_119_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.7777730824410645E-10 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_120_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.9924592936258674E-4 Statistics: Num rows: 21600036511 Data size: 5701632353848 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null) (type: boolean) @@ -36,27 +36,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 8 - Statistics: Num rows: 407242361 Data size: 103520524440 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2398040806 Data size: 613164879160 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col4, _col5, _col6 + outputColumnNames: _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 9 - Statistics: Num rows: 58177483 Data size: 13737647176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45246054 Data size: 10530636632 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col3, _col4, _col5, _col6 input vertices: @@ -89,22 +89,22 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: web_site + filterExpr: (web_company_name = 'pri ') (type: boolean) + Statistics: Num rows: 84 Data size: 8064 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (web_company_name = 'pri ') (type: boolean) + Statistics: Num rows: 12 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint) + expressions: web_site_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 @@ -165,43 +165,43 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_state = 'TX') (type: boolean) - Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ca_state = 'TX') (type: boolean) - Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ca_address_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan - alias: web_site - filterExpr: (web_company_name = 'pri ') (type: boolean) - Statistics: Num rows: 84 Data size: 8064 Basic stats: COMPLETE Column stats: COMPLETE + alias: customer_address + filterExpr: (ca_state = 'TX') (type: boolean) + Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (web_company_name = 'pri ') (type: boolean) - Statistics: Num rows: 12 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (ca_state = 'TX') (type: boolean) + Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: web_site_sk (type: bigint) + expressions: ca_address_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 2 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out index 3a966e9f29c8..f3568baa028f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out @@ -28,7 +28,7 @@ STAGE PLANS: TableScan alias: ws1 filterExpr: (ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_210_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.7777730824410645E-10 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_211_container, bigKeyColName:ws_web_site_sk, smallTablePos:1, keyRatio:2.9924592936258674E-4 Statistics: Num rows: 21600036511 Data size: 5528875272680 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null) (type: boolean) @@ -41,27 +41,27 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3, _col4, _col5 + outputColumnNames: _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 8 - Statistics: Num rows: 407242361 Data size: 100305764080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2398040806 Data size: 594023731240 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col1 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3, _col4, _col5 + outputColumnNames: _col2, _col3, _col4, _col5 input vertices: 1 Map 9 - Statistics: Num rows: 58177483 Data size: 13315405840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45246054 Data size: 10211846728 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col3, _col4, _col5 input vertices: @@ -94,22 +94,22 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: date_dim - filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + alias: web_site + filterExpr: (web_company_name = 'pri ') (type: boolean) + Statistics: Num rows: 84 Data size: 8064 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) - Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (web_company_name = 'pri ') (type: boolean) + Statistics: Num rows: 12 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: d_date_sk (type: bigint) + expressions: web_site_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 @@ -194,43 +194,43 @@ STAGE PLANS: Map 8 Map Operator Tree: TableScan - alias: customer_address - filterExpr: (ca_state = 'TX') (type: boolean) - Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (ca_state = 'TX') (type: boolean) - Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE + predicate: CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' (type: boolean) + Statistics: Num rows: 8116 Data size: 519424 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ca_address_sk (type: bigint) + expressions: d_date_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8116 Data size: 64928 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 9 Map Operator Tree: TableScan - alias: web_site - filterExpr: (web_company_name = 'pri ') (type: boolean) - Statistics: Num rows: 84 Data size: 8064 Basic stats: COMPLETE Column stats: COMPLETE + alias: customer_address + filterExpr: (ca_state = 'TX') (type: boolean) + Statistics: Num rows: 40000000 Data size: 3760000000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (web_company_name = 'pri ') (type: boolean) - Statistics: Num rows: 12 Data size: 1152 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (ca_state = 'TX') (type: boolean) + Statistics: Num rows: 754717 Data size: 70943398 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: web_site_sk (type: bigint) + expressions: ca_address_sk (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 754717 Data size: 6037736 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 12 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query98.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query98.q.out index c8fc334ead4c..6034e697996d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query98.q.out @@ -17,7 +17,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_54_container, bigKeyColName:ss_item_sk, smallTablePos:1, keyRatio:0.2727272808584318 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_50_container, bigKeyColName:ss_item_sk, smallTablePos:1, keyRatio:0.030300956793193314 Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) @@ -27,26 +27,26 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: bigint) + 0 _col2 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1 input vertices: 1 Map 5 - Statistics: Num rows: 22502967927 Data size: 15489075671302 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9167247954 Data size: 882073848240 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col1, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col1, _col5, _col6, _col7, _col8, _col9 input vertices: 1 Map 6 Statistics: Num rows: 2500158608 Data size: 1507113497776 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) - keys: _col8 (type: char(50)), _col7 (type: char(50)), _col4 (type: string), _col5 (type: varchar(200)), _col6 (type: decimal(7,2)) - minReductionHashAggr: 0.99 + keys: _col9 (type: char(50)), _col8 (type: char(50)), _col5 (type: string), _col6 (type: varchar(200)), _col7 (type: decimal(7,2)) + minReductionHashAggr: 0.6650032 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 126000 Data size: 86940000 Basic stats: COMPLETE Column stats: COMPLETE @@ -60,28 +60,6 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 - Map Operator Tree: - TableScan - alias: item - filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 6 Map Operator Tree: TableScan alias: date_dim @@ -118,6 +96,28 @@ STAGE PLANS: Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: item + filterExpr: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 462000 Data size: 270601408 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (i_category) IN ('Books ', 'Jewelry ', 'Sports ') (type: boolean) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string), i_item_desc (type: varchar(200)), i_current_price (type: decimal(7,2)), i_class (type: char(50)), i_category (type: char(50)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 126000 Data size: 73800496 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: varchar(200)), _col3 (type: decimal(7,2)), _col4 (type: char(50)), _col5 (type: char(50)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) Reducer 2 Execution mode: vectorized, llap Reduce Operator Tree: From 1285297cd35bd92d4c334f733fea2f032eb83454 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Sun, 28 Dec 2025 14:57:17 -0800 Subject: [PATCH 07/14] HIVE-29368: Sonar Qube feedback + one more test --- .../hadoop/hive/ql/stats/StatsUtils.java | 30 +++++++++++-------- .../hadoop/hive/ql/stats/TestStatsUtils.java | 12 ++++++++ 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 07d616602c90..fce47b227c7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -2088,19 +2088,7 @@ private static List extractNDVGroupingColumns(List colStats // compute product of distinct values of grouping columns for (ColStatistics cs : colStats) { if (cs != null) { - long ndv = cs.getCountDistint(); - - if (ndv == 0L) { - // Typically, ndv == 0 means "NDV unknown", and no safe GROUPBY adjustments are possible - // However, there is a special exception for "constant NULL" columns. They are intentionally generated - // with NDV values of 0 and numNulls == numRows, while their actual NDV is 1 - if (cs.getNumNulls() >= parentStats.getNumRows()) { - ndv = 1L; - } - } else if (cs.getNumNulls() > 0L) { - ndv = StatsUtils.safeAdd(ndv, 1L); - } - ndvValues.add(ndv); + ndvValues.add(getGroupingColumnNdv(cs, parentStats)); } else { if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) { // the column must be an aggregate column inserted by GBY. We @@ -2119,4 +2107,20 @@ private static List extractNDVGroupingColumns(List colStats return ndvValues; } + + private static long getGroupingColumnNdv(ColStatistics cs, Statistics parentStats) { + long ndv = cs.getCountDistint(); + + if (ndv == 0L) { + // Typically, ndv == 0 means "NDV unknown", and no safe GROUPBY adjustments are possible + // However, there is a special exception for "constant NULL" columns. They are intentionally generated + // with NDV values of 0 and numNulls == numRows, while their actual NDV is 1 + if (cs.getNumNulls() >= parentStats.getNumRows()) { + ndv = 1L; + } + } else if (cs.getNumNulls() > 0L) { + ndv = StatsUtils.safeAdd(ndv, 1L); + } + return ndv; + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java index 8a3dd1cea14e..75714bba8ff1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java @@ -321,4 +321,16 @@ void testComputeNDVGroupingColumnsPartialStats() { assertEquals(0, ndv, "Partial stats (ndv=0, numNulls= numRows, it's a "constant NULL" column, so NDV should be 1 + ColStatistics cs = createColStats("all_nulls_col", 0, 1000); + Statistics parentStats = createParentStats(1000); + List colStats = Collections.singletonList(cs); + + long ndv = StatsUtils.computeNDVGroupingColumns(colStats, parentStats, false); + + assertEquals(1, ndv, "All-null column (ndv=0, numNulls==numRows) should have NDV inflated to 1"); + } + } From 40fc7ff55099f8aa53a587979ff312b77ca438eb Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Tue, 30 Dec 2025 09:10:00 -0800 Subject: [PATCH 08/14] HIVE-29368: a typo in the comment --- .../hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java index 7b61bc460158..6e9a89a9d4b6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java @@ -44,7 +44,7 @@ public void add(ColStatistics stat) { // NDVs can only be accurately combined if full information about columns, query branches and // their relationships is available. Without that info, there is only one "truly conservative" - // value of NDV which is 0, which means that the NDV is unknown. It forces optimized + // value of NDV which is 0, which means that the NDV is unknown. It forces optimizer // to make the most conservative decisions possible, which is the exact goal of // PessimisticStatCombiner. It does inflate statistics in multiple cases, but at the same time it // also ensures than the query execution does not "blow up" due to too optimistic stats estimates From cf4fa0b5feb12ea12a610b7f32b182eb6e8cf46e Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Wed, 4 Feb 2026 15:37:55 -0800 Subject: [PATCH 09/14] HIVE-29368: trying a more intelligent NDV estimate for CASE/WHEN clauses before falling back to pessimistic combining --- .../annotation/StatsRulesProcFactory.java | 22 ++++++++ .../hadoop/hive/ql/plan/Statistics.java | 7 ++- .../hadoop/hive/ql/stats/StatsUtils.java | 2 +- .../ql/stats/estimator/StatEstimator.java | 15 ++++++ .../ql/udf/generic/GenericUDFCoalesce.java | 35 ++++++++++++ .../hive/ql/udf/generic/GenericUDFIf.java | 30 +++++++++++ .../hive/ql/udf/generic/GenericUDFWhen.java | 51 ++++++++++++++++++ .../llap/pessimistic_stat_combiner_ndv.q.out | 54 +++++++++---------- 8 files changed, 187 insertions(+), 29 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 19f83f39147f..17932ca608f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -2030,6 +2030,18 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } + // NDV=0 means join key statistics are unavailable - fall back to joinFactor heuristic + if (allSatisfyPreCondition) { + for (int pos = 0; pos < parents.size(); pos++) { + ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); + List keyExprs = StatsUtils.getQualifedReducerKeyNames(parent.getConf().getOutputKeyColumnNames()); + if (!satisfyPrecondition(parent.getStatistics(), keyExprs)) { + allSatisfyPreCondition = false; + break; + } + } + } + if (allSatisfyPreCondition) { // statistics object that is combination of statistics from all @@ -3237,6 +3249,16 @@ static boolean satisfyPrecondition(Statistics stats) { && !stats.getColumnStatsState().equals(Statistics.State.NONE); } + static boolean satisfyPrecondition(Statistics stats, List joinKeys) { + for (String col : joinKeys) { + ColStatistics cs = stats.getColumnStatisticsFromColName(col); + if (cs != null && cs.getCountDistint() == 0L) { + return false; + } + } + return true; + } + // check if all parent statistics are available private static boolean isAllParentsContainStatistics(Operator op) { for (Operator parent : op.getParentOperators()) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index 2e36c85e0919..90acc8503387 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -256,7 +256,12 @@ public void addToColumnStats(List colStats) { updatedCS = columnStats.get(key); updatedCS.setAvgColLen(Math.max(updatedCS.getAvgColLen(), cs.getAvgColLen())); updatedCS.setNumNulls(StatsUtils.safeAdd(updatedCS.getNumNulls(), cs.getNumNulls())); - updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint())); + if(updatedCS.getCountDistint() > 0 && cs.getCountDistint() > 0) { + updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint())); + } else { + // If one is unknown, the product is also unknown + updatedCS.setCountDistint(0); + } columnStats.put(key, updatedCS); } else { columnStats.put(key, cs); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index fce47b227c7b..1048cc2629cb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1573,7 +1573,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis csList.add(cs); } if (csList.size() == engfd.getChildren().size()) { - Optional res = se.estimate(csList); + Optional res = se.estimate(csList, engfd.getChildren()); if (res.isPresent()) { ColStatistics newStats = res.get(); colType = colType.toLowerCase(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java index 94aaa32ecfcb..16d01adfe063 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java @@ -22,6 +22,7 @@ import java.util.Optional; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; /** * Enables statistics related computation on UDFs @@ -40,4 +41,18 @@ public interface StatEstimator { * @return {@link ColStatistics} estimate for the actual UDF. */ public Optional estimate(List argStats); + + /** + * Computes the output statistics of the actual UDF with access to original expressions. + * + * This method provides access to the original expression nodes, allowing estimators to make + * more accurate estimates when expressions have special properties (e.g., all constants). + * + * @param argStats the statistics for every argument of the UDF + * @param argExprs the original expression nodes for every argument of the UDF + * @return {@link ColStatistics} estimate for the actual UDF. + */ + default Optional estimate(List argStats, List argExprs) { + return estimate(argStats); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java index bbca9242ecaa..a69400ab8662 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java @@ -18,14 +18,18 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; @@ -90,6 +94,37 @@ static class CoalesceStatEstimator implements StatEstimator { @Override public Optional estimate(List argStats) { + return estimate(argStats, null); + } + + @Override + public Optional estimate(List argStats, List argExprs) { + if (argExprs != null && !argExprs.isEmpty()) { + Set distinctConstants = new HashSet<>(); + boolean allConstants = true; + + for (ExprNodeDesc expr : argExprs) { + if (!(expr instanceof ExprNodeConstantDesc)) { + allConstants = false; + break; + } + distinctConstants.add(((ExprNodeConstantDesc) expr).getValue()); + } + + if (allConstants && !distinctConstants.isEmpty()) { + ColStatistics result = argStats.get(0).clone(); + result.setCountDistint(distinctConstants.size()); + result.setIsEstimated(true); + for (int i = 1; i < argStats.size(); i++) { + if (argStats.get(i).getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(argStats.get(i).getAvgColLen()); + } + } + return Optional.of(result); + } + } + + // Fall back to pessimistic combining PessimisticStatCombiner combiner = new PessimisticStatCombiner(); for (int i = 0; i < argStats.size(); i++) { combiner.add(argStats.get(i)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index eaa352317267..7ade70afef53 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -29,6 +31,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; @@ -188,6 +192,32 @@ static class IfStatEstimator implements StatEstimator { @Override public Optional estimate(List argStats) { + return estimate(argStats, null); + } + + @Override + public Optional estimate(List argStats, List argExprs) { + // argExprs: [condition, thenValue, elseValue] + if (argExprs != null && argExprs.size() == 3) { + ExprNodeDesc thenExpr = argExprs.get(1); + ExprNodeDesc elseExpr = argExprs.get(2); + + if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) { + Set distinctConstants = new HashSet<>(); + distinctConstants.add(((ExprNodeConstantDesc) thenExpr).getValue()); + distinctConstants.add(((ExprNodeConstantDesc) elseExpr).getValue()); + + ColStatistics result = argStats.get(1).clone(); + result.setCountDistint(distinctConstants.size()); + result.setIsEstimated(true); + if (argStats.get(2).getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(argStats.get(2).getAvgColLen()); + } + return Optional.of(result); + } + } + + // Fall back to pessimistic combining PessimisticStatCombiner combiner = new PessimisticStatCombiner(); combiner.add(argStats.get(1)); combiner.add(argStats.get(2)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java index e6d3580692d3..17b9f9848883 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java @@ -18,13 +18,17 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.HashSet; import java.util.List; import java.util.Optional; +import java.util.Set; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; @@ -144,6 +148,53 @@ static class WhenStatEstimator implements StatEstimator { @Override public Optional estimate(List argStats) { + return estimate(argStats, null); + } + + @Override + public Optional estimate(List argStats, List argExprs) { + if (argExprs != null) { + Set distinctConstants = new HashSet<>(); + boolean allConstants = true; + + // Value expressions are at odd indices: 1, 3, 5, ... + for (int i = 1; i < argExprs.size(); i += 2) { + if (!(argExprs.get(i) instanceof ExprNodeConstantDesc)) { + allConstants = false; + break; + } + distinctConstants.add(((ExprNodeConstantDesc) argExprs.get(i)).getValue()); + } + // Check ELSE branch if present (odd number of args) + if (allConstants && argExprs.size() % 2 == 1) { + ExprNodeDesc elseExpr = argExprs.get(argExprs.size() - 1); + if (!(elseExpr instanceof ExprNodeConstantDesc)) { + allConstants = false; + } else { + distinctConstants.add(((ExprNodeConstantDesc) elseExpr).getValue()); + } + } + + if (allConstants && !distinctConstants.isEmpty()) { + ColStatistics result = argStats.get(1).clone(); + result.setCountDistint(distinctConstants.size()); + result.setIsEstimated(true); + for (int i = 3; i < argStats.size(); i += 2) { + if (argStats.get(i).getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(argStats.get(i).getAvgColLen()); + } + } + if (argStats.size() % 2 == 1) { + ColStatistics elseStat = argStats.get(argStats.size() - 1); + if (elseStat.getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(elseStat.getAvgColLen()); + } + } + return Optional.of(result); + } + } + + // Fall back to pessimistic combining PessimisticStatCombiner combiner = new PessimisticStatCombiner(); for (int i = 1; i < argStats.size(); i += 2) { combiner.add(argStats.get(i)); diff --git a/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out b/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out index b23255417f92..2b2c665e8f68 100644 --- a/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out +++ b/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out @@ -28,14 +28,14 @@ FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub GROUP BY x PREHOOK: type: QUERY PREHOOK: Input: default@t1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: EXPLAIN SELECT x, COUNT(*) FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub GROUP BY x POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -63,13 +63,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -81,10 +81,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -105,7 +105,7 @@ FROM ( GROUP BY x PREHOOK: type: QUERY PREHOOK: Input: default@t1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: EXPLAIN SELECT x, COUNT(*) FROM ( @@ -115,7 +115,7 @@ FROM ( GROUP BY x POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -143,13 +143,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -161,10 +161,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -185,7 +185,7 @@ FROM ( GROUP BY x PREHOOK: type: QUERY PREHOOK: Input: default@t1 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: EXPLAIN SELECT x, COUNT(*) FROM ( @@ -195,7 +195,7 @@ FROM ( GROUP BY x POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -223,13 +223,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500000 Data size: 46500000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -241,10 +241,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250000 Data size: 23250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -345,7 +345,7 @@ JOIN t2 b ON a.k = b.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -#### A masked pattern was here #### +PREHOOK: Output: hdfs://### HDFS PATH ### POSTHOOK: query: EXPLAIN SELECT a.k, a.total, a.sample, b.v1 FROM ( @@ -387,7 +387,7 @@ JOIN t2 b ON a.k = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -#### A masked pattern was here #### +POSTHOOK: Output: hdfs://### HDFS PATH ### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -416,13 +416,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500000 Data size: 139500000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 60 Data size: 16740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500000 Data size: 139500000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 60 Data size: 16740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -456,13 +456,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250000 Data size: 69750000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250000 Data size: 69750000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 5580 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: string) Reducer 3 Execution mode: llap @@ -474,14 +474,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 8260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 8260 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250000 Data size: 103250000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 8260 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat From c18f8cdf7bc60406aa23b283ba954af2fb8e87c9 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 5 Feb 2026 11:06:35 -0800 Subject: [PATCH 10/14] HIVE-29368: refactoring constant NDV estimates as per the PR feedback --- .../hadoop/hive/ql/plan/Statistics.java | 2 +- .../hadoop/hive/ql/stats/StatsUtils.java | 2 +- .../ql/stats/estimator/StatEstimator.java | 15 -- .../ql/udf/generic/GenericUDFCoalesce.java | 56 ++--- .../hive/ql/udf/generic/GenericUDFIf.java | 48 ++-- .../hive/ql/udf/generic/GenericUDFWhen.java | 80 ++++--- .../TestGenericUDFCoalesceStatEstimator.java | 181 +++++++++++++++ .../TestGenericUDFIfStatEstimator.java | 160 +++++++++++++ .../TestGenericUDFWhenStatEstimator.java | 214 ++++++++++++++++++ 9 files changed, 644 insertions(+), 114 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFWhenStatEstimator.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index 90acc8503387..0b5f6605fc68 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -256,7 +256,7 @@ public void addToColumnStats(List colStats) { updatedCS = columnStats.get(key); updatedCS.setAvgColLen(Math.max(updatedCS.getAvgColLen(), cs.getAvgColLen())); updatedCS.setNumNulls(StatsUtils.safeAdd(updatedCS.getNumNulls(), cs.getNumNulls())); - if(updatedCS.getCountDistint() > 0 && cs.getCountDistint() > 0) { + if (updatedCS.getCountDistint() > 0 && cs.getCountDistint() > 0) { updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint())); } else { // If one is unknown, the product is also unknown diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 1048cc2629cb..fce47b227c7b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -1573,7 +1573,7 @@ public static ColStatistics getColStatisticsFromExpression(HiveConf conf, Statis csList.add(cs); } if (csList.size() == engfd.getChildren().size()) { - Optional res = se.estimate(csList, engfd.getChildren()); + Optional res = se.estimate(csList); if (res.isPresent()) { ColStatistics newStats = res.get(); colType = colType.toLowerCase(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java index 16d01adfe063..94aaa32ecfcb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimator.java @@ -22,7 +22,6 @@ import java.util.Optional; import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; /** * Enables statistics related computation on UDFs @@ -41,18 +40,4 @@ public interface StatEstimator { * @return {@link ColStatistics} estimate for the actual UDF. */ public Optional estimate(List argStats); - - /** - * Computes the output statistics of the actual UDF with access to original expressions. - * - * This method provides access to the original expression nodes, allowing estimators to make - * more accurate estimates when expressions have special properties (e.g., all constants). - * - * @param argStats the statistics for every argument of the UDF - * @param argExprs the original expression nodes for every argument of the UDF - * @return {@link ColStatistics} estimate for the actual UDF. - */ - default Optional estimate(List argStats, List argExprs) { - return estimate(argStats); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java index a69400ab8662..a8c298f7b02d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java @@ -28,11 +28,10 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; /** @@ -49,6 +48,7 @@ public class GenericUDFCoalesce extends GenericUDF implements StatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; + private transient Integer numberOfDistinctConstants; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException { @@ -56,6 +56,9 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen argumentOIs = arguments; returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); + Set distinctConstants = new HashSet<>(); + boolean allConstants = true; + for (int i = 0; i < arguments.length; i++) { if (!returnOIResolver.update(arguments[i])) { throw new UDFArgumentTypeException(i, @@ -64,7 +67,18 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" is expected but \"" + arguments[i].getTypeName() + "\" is found"); } + if (allConstants) { + if (arguments[i] instanceof ConstantObjectInspector) { + distinctConstants.add(((ConstantObjectInspector) arguments[i]).getWritableConstantValue()); + } else { + allConstants = false; + } + } } + + numberOfDistinctConstants = allConstants && !distinctConstants.isEmpty() + ? distinctConstants.size() : null; + return returnOIResolver.get(); } @@ -87,41 +101,27 @@ public String getDisplayString(String[] children) { @Override public StatEstimator getStatEstimator() { - return new CoalesceStatEstimator(); + return new CoalesceStatEstimator(numberOfDistinctConstants); } static class CoalesceStatEstimator implements StatEstimator { + private final Integer numberOfDistinctConstants; - @Override - public Optional estimate(List argStats) { - return estimate(argStats, null); + CoalesceStatEstimator(Integer numberOfDistinctConstants) { + this.numberOfDistinctConstants = numberOfDistinctConstants; } @Override - public Optional estimate(List argStats, List argExprs) { - if (argExprs != null && !argExprs.isEmpty()) { - Set distinctConstants = new HashSet<>(); - boolean allConstants = true; - - for (ExprNodeDesc expr : argExprs) { - if (!(expr instanceof ExprNodeConstantDesc)) { - allConstants = false; - break; - } - distinctConstants.add(((ExprNodeConstantDesc) expr).getValue()); - } - - if (allConstants && !distinctConstants.isEmpty()) { - ColStatistics result = argStats.get(0).clone(); - result.setCountDistint(distinctConstants.size()); - result.setIsEstimated(true); - for (int i = 1; i < argStats.size(); i++) { - if (argStats.get(i).getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(argStats.get(i).getAvgColLen()); - } + public Optional estimate(List argStats) { + if (numberOfDistinctConstants != null) { + ColStatistics result = argStats.get(0).clone(); + result.setCountDistint(numberOfDistinctConstants); + for (int i = 1; i < argStats.size(); i++) { + if (argStats.get(i).getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(argStats.get(i).getAvgColLen()); } - return Optional.of(result); } + return Optional.of(result); } // Fall back to pessimistic combining diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index 7ade70afef53..5b592753680c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -18,10 +18,9 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Optional; -import java.util.Set; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -31,12 +30,11 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; @@ -129,6 +127,7 @@ public class GenericUDFIf extends GenericUDF implements StatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; + private transient Integer numberOfDistinctConstants; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -161,6 +160,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" and \"" + arguments[2].getTypeName() + "\""); } + if (arguments[1] instanceof ConstantObjectInspector + && arguments[2] instanceof ConstantObjectInspector) { + Object thenValue = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue(); + Object elseValue = ((ConstantObjectInspector) arguments[2]).getWritableConstantValue(); + numberOfDistinctConstants = Objects.equals(thenValue, elseValue) ? 1 : 2; + } + return returnOIResolver.get(); } @@ -185,36 +191,25 @@ public String getDisplayString(String[] children) { @Override public StatEstimator getStatEstimator() { - return new IfStatEstimator(); + return new IfStatEstimator(numberOfDistinctConstants); } static class IfStatEstimator implements StatEstimator { + private final Integer numberOfDistinctConstants; - @Override - public Optional estimate(List argStats) { - return estimate(argStats, null); + IfStatEstimator(Integer numberOfDistinctConstants) { + this.numberOfDistinctConstants = numberOfDistinctConstants; } @Override - public Optional estimate(List argStats, List argExprs) { - // argExprs: [condition, thenValue, elseValue] - if (argExprs != null && argExprs.size() == 3) { - ExprNodeDesc thenExpr = argExprs.get(1); - ExprNodeDesc elseExpr = argExprs.get(2); - - if (thenExpr instanceof ExprNodeConstantDesc && elseExpr instanceof ExprNodeConstantDesc) { - Set distinctConstants = new HashSet<>(); - distinctConstants.add(((ExprNodeConstantDesc) thenExpr).getValue()); - distinctConstants.add(((ExprNodeConstantDesc) elseExpr).getValue()); - - ColStatistics result = argStats.get(1).clone(); - result.setCountDistint(distinctConstants.size()); - result.setIsEstimated(true); - if (argStats.get(2).getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(argStats.get(2).getAvgColLen()); - } - return Optional.of(result); + public Optional estimate(List argStats) { + if (numberOfDistinctConstants != null) { + ColStatistics result = argStats.get(1).clone(); + result.setCountDistint(numberOfDistinctConstants); + if (argStats.get(2).getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(argStats.get(2).getAvgColLen()); } + return Optional.of(result); } // Fall back to pessimistic combining @@ -223,7 +218,6 @@ public Optional estimate(List argStats, List distinctConstants = new HashSet<>(); + boolean allBranchesConstant = true; + for (int i = 0; i + 1 < arguments.length; i += 2) { if (!arguments[i].getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { throw new UDFArgumentTypeException(i, "\"" @@ -83,6 +86,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" is expected but \"" + arguments[i + 1].getTypeName() + "\" is found"); } + if (allBranchesConstant) { + if (arguments[i + 1] instanceof ConstantObjectInspector) { + distinctConstants.add(((ConstantObjectInspector) arguments[i + 1]).getWritableConstantValue()); + } else { + allBranchesConstant = false; + } + } } if (arguments.length % 2 == 1) { int i = arguments.length - 2; @@ -93,8 +103,18 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" is expected but \"" + arguments[i + 1].getTypeName() + "\" is found"); } + if (allBranchesConstant) { + if (arguments[i + 1] instanceof ConstantObjectInspector) { + distinctConstants.add(((ConstantObjectInspector) arguments[i + 1]).getWritableConstantValue()); + } else { + allBranchesConstant = false; + } + } } + numberOfDistinctConstants = allBranchesConstant && !distinctConstants.isEmpty() + ? distinctConstants.size() : null; + return returnOIResolver.get(); } @@ -141,57 +161,33 @@ public String getDisplayString(String[] children) { @Override public StatEstimator getStatEstimator() { - return new WhenStatEstimator(); + return new WhenStatEstimator(numberOfDistinctConstants); } static class WhenStatEstimator implements StatEstimator { + private final Integer numberOfDistinctConstants; - @Override - public Optional estimate(List argStats) { - return estimate(argStats, null); + WhenStatEstimator(Integer numberOfDistinctConstants) { + this.numberOfDistinctConstants = numberOfDistinctConstants; } @Override - public Optional estimate(List argStats, List argExprs) { - if (argExprs != null) { - Set distinctConstants = new HashSet<>(); - boolean allConstants = true; - - // Value expressions are at odd indices: 1, 3, 5, ... - for (int i = 1; i < argExprs.size(); i += 2) { - if (!(argExprs.get(i) instanceof ExprNodeConstantDesc)) { - allConstants = false; - break; - } - distinctConstants.add(((ExprNodeConstantDesc) argExprs.get(i)).getValue()); - } - // Check ELSE branch if present (odd number of args) - if (allConstants && argExprs.size() % 2 == 1) { - ExprNodeDesc elseExpr = argExprs.get(argExprs.size() - 1); - if (!(elseExpr instanceof ExprNodeConstantDesc)) { - allConstants = false; - } else { - distinctConstants.add(((ExprNodeConstantDesc) elseExpr).getValue()); + public Optional estimate(List argStats) { + if (numberOfDistinctConstants != null) { + ColStatistics result = argStats.get(1).clone(); + result.setCountDistint(numberOfDistinctConstants); + for (int i = 3; i < argStats.size(); i += 2) { + if (argStats.get(i).getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(argStats.get(i).getAvgColLen()); } } - - if (allConstants && !distinctConstants.isEmpty()) { - ColStatistics result = argStats.get(1).clone(); - result.setCountDistint(distinctConstants.size()); - result.setIsEstimated(true); - for (int i = 3; i < argStats.size(); i += 2) { - if (argStats.get(i).getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(argStats.get(i).getAvgColLen()); - } - } - if (argStats.size() % 2 == 1) { - ColStatistics elseStat = argStats.get(argStats.size() - 1); - if (elseStat.getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(elseStat.getAvgColLen()); - } + if (argStats.size() % 2 == 1) { + ColStatistics elseStat = argStats.get(argStats.size() - 1); + if (elseStat.getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(elseStat.getAvgColLen()); } - return Optional.of(result); } + return Optional.of(result); } // Fall back to pessimistic combining diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java new file mode 100644 index 000000000000..f91b93ac2b00 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.jupiter.api.Test; + +class TestGenericUDFCoalesceStatEstimator { + + @Test + void testAllArgumentsConstantDistinctValues() throws UDFArgumentTypeException { + GenericUDFCoalesce udf = new GenericUDFCoalesce(); + + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); + + udf.initialize(new ObjectInspector[]{constA, constB, constC}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("arg1", 100, 10), + createColStats("arg2", 200, 20), + createColStats("arg3", 300, 30))); + + assertTrue(result.isPresent()); + assertEquals(3, result.get().getCountDistint()); + } + + @Test + void testAllArgumentsConstantWithDuplicates() throws UDFArgumentTypeException { + GenericUDFCoalesce udf = new GenericUDFCoalesce(); + + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constA2 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + + udf.initialize(new ObjectInspector[]{constA, constA2, constB}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("arg1", 100, 10), + createColStats("arg2", 200, 20), + createColStats("arg3", 300, 30))); + + assertTrue(result.isPresent()); + assertEquals(2, result.get().getCountDistint()); + } + + @Test + void testSingleConstantArgument() throws UDFArgumentTypeException { + GenericUDFCoalesce udf = new GenericUDFCoalesce(); + + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + + udf.initialize(new ObjectInspector[]{constA}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("arg1", 100, 10))); + + assertTrue(result.isPresent()); + assertEquals(1, result.get().getCountDistint()); + } + + @Test + void testNonConstantArgumentFallsBackToPessimisticCombiner() throws UDFArgumentTypeException { + GenericUDFCoalesce udf = new GenericUDFCoalesce(); + + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); + + udf.initialize(new ObjectInspector[]{constA, nonConst, constC}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("arg1", 100, 10), + createColStats("arg2", 200, 20), + createColStats("arg3", 300, 30))); + + assertTrue(result.isPresent()); + assertEquals(0, result.get().getCountDistint()); + } + + @Test + void testAllNonConstantArguments() throws UDFArgumentTypeException { + GenericUDFCoalesce udf = new GenericUDFCoalesce(); + + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + udf.initialize(new ObjectInspector[]{nonConst, nonConst, nonConst}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("arg1", 100, 10), + createColStats("arg2", 200, 20), + createColStats("arg3", 300, 30))); + + assertTrue(result.isPresent()); + assertEquals(0, result.get().getCountDistint()); + } + + @Test + void testConstantArgumentsTakesMaxAvgColLen() throws UDFArgumentTypeException { + GenericUDFCoalesce udf = new GenericUDFCoalesce(); + + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); + + udf.initialize(new ObjectInspector[]{constA, constB, constC}); + + StatEstimator estimator = udf.getStatEstimator(); + + ColStatistics arg1Stats = createColStats("arg1", 100, 10); + arg1Stats.setAvgColLen(5.0); + ColStatistics arg2Stats = createColStats("arg2", 200, 20); + arg2Stats.setAvgColLen(25.0); + ColStatistics arg3Stats = createColStats("arg3", 300, 30); + arg3Stats.setAvgColLen(15.0); + + Optional result = estimator.estimate(Arrays.asList(arg1Stats, arg2Stats, arg3Stats)); + + assertTrue(result.isPresent()); + assertEquals(25.0, result.get().getAvgColLen()); + } + + private ColStatistics createColStats(String name, long ndv, long numNulls) { + ColStatistics cs = new ColStatistics(name, "string"); + cs.setCountDistint(ndv); + cs.setNumNulls(numNulls); + cs.setAvgColLen(10.0); + return cs; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java new file mode 100644 index 000000000000..d3ada4d033a2 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.jupiter.api.Test; + +class TestGenericUDFIfStatEstimator { + + @Test + void testBothBranchesConstantDistinctValues() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + ColStatistics thenStats = createColStats("then_col", 100, 10); + ColStatistics elseStats = createColStats("else_col", 200, 20); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), thenStats, elseStats)); + + assertTrue(result.isPresent()); + assertEquals(2, result.get().getCountDistint()); + } + + @Test + void testBothBranchesConstantSameValue() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), + createColStats("then_col", 100, 10), + createColStats("else_col", 200, 20))); + + assertTrue(result.isPresent()); + assertEquals(1, result.get().getCountDistint()); + } + + @Test + void testNonConstantThenBranchFallsBackToPessimisticCombiner() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), + createColStats("then_col", 100, 10), + createColStats("else_col", 200, 20))); + + assertTrue(result.isPresent()); + assertEquals(0, result.get().getCountDistint()); + } + + @Test + void testNonConstantElseBranchFallsBackToPessimisticCombiner() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), + createColStats("then_col", 100, 10), + createColStats("else_col", 200, 20))); + + assertTrue(result.isPresent()); + assertEquals(0, result.get().getCountDistint()); + } + + @Test + void testConstantBranchesTakesMaxAvgColLen() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + ColStatistics thenStats = createColStats("then_col", 100, 10); + thenStats.setAvgColLen(5.0); + ColStatistics elseStats = createColStats("else_col", 200, 20); + elseStats.setAvgColLen(15.0); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), thenStats, elseStats)); + + assertTrue(result.isPresent()); + assertEquals(15.0, result.get().getAvgColLen()); + } + + private ColStatistics createColStats(String name, long ndv, long numNulls) { + ColStatistics cs = new ColStatistics(name, "string"); + cs.setCountDistint(ndv); + cs.setNumNulls(numNulls); + cs.setAvgColLen(10.0); + return cs; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFWhenStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFWhenStatEstimator.java new file mode 100644 index 000000000000..b83f68ea0de0 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFWhenStatEstimator.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; +import org.junit.jupiter.api.Test; + +class TestGenericUDFWhenStatEstimator { + + @Test + void testAllBranchesConstantDistinctValues() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); + + // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN 'B' ELSE 'C' END + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB, constC}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 100, 10), + createColStats("cond2", 2, 0), + createColStats("then2", 200, 20), + createColStats("else", 300, 30))); + + assertTrue(result.isPresent()); + assertEquals(3, result.get().getCountDistint()); + } + + @Test + void testAllBranchesConstantWithDuplicates() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constA2 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + + // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN 'A' ELSE 'B' END + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constA2, constB}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 100, 10), + createColStats("cond2", 2, 0), + createColStats("then2", 200, 20), + createColStats("else", 300, 30))); + + assertTrue(result.isPresent()); + assertEquals(2, result.get().getCountDistint()); + } + + @Test + void testWithoutElseBranch() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + + // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN 'B' END (no ELSE) + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 100, 10), + createColStats("cond2", 2, 0), + createColStats("then2", 200, 20))); + + assertTrue(result.isPresent()); + assertEquals(2, result.get().getCountDistint()); + } + + @Test + void testNonConstantBranchFallsBackToPessimisticCombiner() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); + + // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN col ELSE 'C' END + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, nonConst, constC}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 100, 10), + createColStats("cond2", 2, 0), + createColStats("then2", 200, 20), + createColStats("else", 300, 30))); + + assertTrue(result.isPresent()); + assertEquals(0, result.get().getCountDistint()); + } + + @Test + void testNonConstantElseFallsBackToPessimisticCombiner() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN 'B' ELSE col END + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB, nonConst}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 100, 10), + createColStats("cond2", 2, 0), + createColStats("then2", 200, 20), + createColStats("else", 300, 30))); + + assertTrue(result.isPresent()); + assertEquals(0, result.get().getCountDistint()); + } + + @Test + void testConstantBranchesTakesMaxAvgColLen() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); + + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB, constC}); + + StatEstimator estimator = udf.getStatEstimator(); + + ColStatistics then1Stats = createColStats("then1", 100, 10); + then1Stats.setAvgColLen(5.0); + ColStatistics then2Stats = createColStats("then2", 200, 20); + then2Stats.setAvgColLen(25.0); + ColStatistics elseStats = createColStats("else", 300, 30); + elseStats.setAvgColLen(15.0); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + then1Stats, + createColStats("cond2", 2, 0), + then2Stats, + elseStats)); + + assertTrue(result.isPresent()); + assertEquals(25.0, result.get().getAvgColLen()); + } + + private ColStatistics createColStats(String name, long ndv, long numNulls) { + ColStatistics cs = new ColStatistics(name, "string"); + cs.setCountDistint(ndv); + cs.setNumNulls(numNulls); + cs.setAvgColLen(10.0); + return cs; + } +} From bb7c3fd97baa659777fe73295b23d5168b4a87fc Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 5 Feb 2026 14:40:10 -0800 Subject: [PATCH 11/14] HIVE-29368: further refactoring constant NDV estimates and PessimisticStatCombiner to use more accurate stats while still falling back to "unknown NDV" when identified --- .../estimator/BranchingStatEstimator.java | 50 + .../estimator/PessimisticStatCombiner.java | 47 +- .../ql/udf/generic/GenericUDFCoalesce.java | 46 +- .../hive/ql/udf/generic/GenericUDFIf.java | 37 +- .../hive/ql/udf/generic/GenericUDFWhen.java | 54 +- .../estimator/TestBranchingStatEstimator.java | 150 ++ .../TestPessimisticStatCombiner.java | 32 +- .../TestGenericUDFCoalesceStatEstimator.java | 20 +- .../TestGenericUDFIfStatEstimator.java | 85 +- .../TestGenericUDFWhenStatEstimator.java | 215 ++- .../clientpositive/branching_expr_ndv.q | 75 + .../llap/branching_expr_ndv.q.out | 1553 +++++++++++++++++ 12 files changed, 2199 insertions(+), 165 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/BranchingStatEstimator.java create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestBranchingStatEstimator.java create mode 100644 ql/src/test/queries/clientpositive/branching_expr_ndv.q create mode 100644 ql/src/test/results/clientpositive/llap/branching_expr_ndv.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/BranchingStatEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/BranchingStatEstimator.java new file mode 100644 index 000000000000..2fe6bd2ce35c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/BranchingStatEstimator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats.estimator; + +import java.util.List; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +/** + * Base class for StatEstimators that handle branching expressions (CASE/WHEN, IF, COALESCE). + * Combines branch statistics using PessimisticStatCombiner and accounts for distinct constants. + */ +public abstract class BranchingStatEstimator implements StatEstimator { + protected final int numberOfDistinctConstants; + + protected BranchingStatEstimator(int numberOfDistinctConstants) { + this.numberOfDistinctConstants = numberOfDistinctConstants; + } + + @Override + public Optional estimate(List argStats) { + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + addBranchStats(combiner, argStats); + if (numberOfDistinctConstants > 1) { + ColStatistics constantsStat = new ColStatistics("_constants", "string"); + constantsStat.setCountDistint(numberOfDistinctConstants); + combiner.add(constantsStat); + } + return combiner.getResult(); + } + + protected abstract void addBranchStats(PessimisticStatCombiner combiner, List argStats); +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java index 6e9a89a9d4b6..bd1ac2307f21 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java @@ -37,37 +37,30 @@ public void add(ColStatistics stat) { result.setRange(null); result.setIsEstimated(true); return; - } else { - if (stat.getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(stat.getAvgColLen()); - } - - // NDVs can only be accurately combined if full information about columns, query branches and - // their relationships is available. Without that info, there is only one "truly conservative" - // value of NDV which is 0, which means that the NDV is unknown. It forces optimizer - // to make the most conservative decisions possible, which is the exact goal of - // PessimisticStatCombiner. It does inflate statistics in multiple cases, but at the same time it - // also ensures than the query execution does not "blow up" due to too optimistic stats estimates + } + if (stat.getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(stat.getAvgColLen()); + } + if (stat.getCountDistint() == 0 || result.getCountDistint() == 0) { result.setCountDistint(0L); - - if (stat.getNumNulls() > result.getNumNulls()) { - result.setNumNulls(stat.getNumNulls()); - } - if (stat.getNumTrues() > result.getNumTrues()) { - result.setNumTrues(stat.getNumTrues()); - } - if (stat.getNumFalses() > result.getNumFalses()) { - result.setNumFalses(stat.getNumFalses()); - } - if (stat.isFilteredColumn()) { - result.setFilterColumn(); - } - + } else if (stat.getCountDistint() > result.getCountDistint()) { + result.setCountDistint(stat.getCountDistint()); + } + if (stat.getNumNulls() > result.getNumNulls()) { + result.setNumNulls(stat.getNumNulls()); + } + if (stat.getNumTrues() > result.getNumTrues()) { + result.setNumTrues(stat.getNumTrues()); + } + if (stat.getNumFalses() > result.getNumFalses()) { + result.setNumFalses(stat.getNumFalses()); + } + if (stat.isFilteredColumn()) { + result.setFilterColumn(); } - } + public Optional getResult() { return Optional.of(result); - } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java index a8c298f7b02d..600c81908e46 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java @@ -20,7 +20,6 @@ import java.util.HashSet; import java.util.List; -import java.util.Optional; import java.util.Set; import org.apache.hadoop.hive.ql.exec.Description; @@ -28,9 +27,10 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.BranchingStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; -import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -48,7 +48,7 @@ public class GenericUDFCoalesce extends GenericUDF implements StatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; - private transient Integer numberOfDistinctConstants; + private transient int numberOfDistinctConstants; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException { @@ -57,7 +57,6 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); Set distinctConstants = new HashSet<>(); - boolean allConstants = true; for (int i = 0; i < arguments.length; i++) { if (!returnOIResolver.update(arguments[i])) { @@ -67,17 +66,12 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" is expected but \"" + arguments[i].getTypeName() + "\" is found"); } - if (allConstants) { - if (arguments[i] instanceof ConstantObjectInspector) { - distinctConstants.add(((ConstantObjectInspector) arguments[i]).getWritableConstantValue()); - } else { - allConstants = false; - } + if (arguments[i] instanceof ConstantObjectInspector) { + distinctConstants.add(((ConstantObjectInspector) arguments[i]).getWritableConstantValue()); } } - numberOfDistinctConstants = allConstants && !distinctConstants.isEmpty() - ? distinctConstants.size() : null; + numberOfDistinctConstants = distinctConstants.size(); return returnOIResolver.get(); } @@ -104,32 +98,16 @@ public StatEstimator getStatEstimator() { return new CoalesceStatEstimator(numberOfDistinctConstants); } - static class CoalesceStatEstimator implements StatEstimator { - private final Integer numberOfDistinctConstants; - - CoalesceStatEstimator(Integer numberOfDistinctConstants) { - this.numberOfDistinctConstants = numberOfDistinctConstants; + static class CoalesceStatEstimator extends BranchingStatEstimator { + CoalesceStatEstimator(int numberOfDistinctConstants) { + super(numberOfDistinctConstants); } @Override - public Optional estimate(List argStats) { - if (numberOfDistinctConstants != null) { - ColStatistics result = argStats.get(0).clone(); - result.setCountDistint(numberOfDistinctConstants); - for (int i = 1; i < argStats.size(); i++) { - if (argStats.get(i).getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(argStats.get(i).getAvgColLen()); - } - } - return Optional.of(result); - } - - // Fall back to pessimistic combining - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); - for (int i = 0; i < argStats.size(); i++) { - combiner.add(argStats.get(i)); + protected void addBranchStats(PessimisticStatCombiner combiner, List argStats) { + for (ColStatistics argStat : argStats) { + combiner.add(argStat); } - return combiner.getResult(); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index 5b592753680c..e9004b7a2b6e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -20,7 +20,6 @@ import java.util.List; import java.util.Objects; -import java.util.Optional; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -30,9 +29,10 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.BranchingStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; -import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -127,7 +127,7 @@ public class GenericUDFIf extends GenericUDF implements StatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; - private transient Integer numberOfDistinctConstants; + private transient int numberOfDistinctConstants; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { @@ -160,11 +160,16 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" and \"" + arguments[2].getTypeName() + "\""); } - if (arguments[1] instanceof ConstantObjectInspector - && arguments[2] instanceof ConstantObjectInspector) { + boolean thenIsConstant = arguments[1] instanceof ConstantObjectInspector; + boolean elseIsConstant = arguments[2] instanceof ConstantObjectInspector; + if (thenIsConstant && elseIsConstant) { Object thenValue = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue(); Object elseValue = ((ConstantObjectInspector) arguments[2]).getWritableConstantValue(); numberOfDistinctConstants = Objects.equals(thenValue, elseValue) ? 1 : 2; + } else if (thenIsConstant || elseIsConstant) { + numberOfDistinctConstants = 1; + } else { + numberOfDistinctConstants = 0; } return returnOIResolver.get(); @@ -194,29 +199,15 @@ public StatEstimator getStatEstimator() { return new IfStatEstimator(numberOfDistinctConstants); } - static class IfStatEstimator implements StatEstimator { - private final Integer numberOfDistinctConstants; - - IfStatEstimator(Integer numberOfDistinctConstants) { - this.numberOfDistinctConstants = numberOfDistinctConstants; + static class IfStatEstimator extends BranchingStatEstimator { + IfStatEstimator(int numberOfDistinctConstants) { + super(numberOfDistinctConstants); } @Override - public Optional estimate(List argStats) { - if (numberOfDistinctConstants != null) { - ColStatistics result = argStats.get(1).clone(); - result.setCountDistint(numberOfDistinctConstants); - if (argStats.get(2).getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(argStats.get(2).getAvgColLen()); - } - return Optional.of(result); - } - - // Fall back to pessimistic combining - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + protected void addBranchStats(PessimisticStatCombiner combiner, List argStats) { combiner.add(argStats.get(1)); combiner.add(argStats.get(2)); - return combiner.getResult(); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java index 31776d76156f..bcbebfe931ac 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFWhen.java @@ -20,13 +20,13 @@ import java.util.HashSet; import java.util.List; -import java.util.Optional; import java.util.Set; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.BranchingStatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; @@ -62,7 +62,7 @@ public class GenericUDFWhen extends GenericUDF implements StatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; - private transient Integer numberOfDistinctConstants; + private transient int numberOfDistinctConstants; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException { @@ -71,7 +71,6 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); Set distinctConstants = new HashSet<>(); - boolean allBranchesConstant = true; for (int i = 0; i + 1 < arguments.length; i += 2) { if (!arguments[i].getTypeName().equals(serdeConstants.BOOLEAN_TYPE_NAME)) { @@ -86,12 +85,8 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" is expected but \"" + arguments[i + 1].getTypeName() + "\" is found"); } - if (allBranchesConstant) { - if (arguments[i + 1] instanceof ConstantObjectInspector) { - distinctConstants.add(((ConstantObjectInspector) arguments[i + 1]).getWritableConstantValue()); - } else { - allBranchesConstant = false; - } + if (arguments[i + 1] instanceof ConstantObjectInspector) { + distinctConstants.add(((ConstantObjectInspector) arguments[i + 1]).getWritableConstantValue()); } } if (arguments.length % 2 == 1) { @@ -103,17 +98,12 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" is expected but \"" + arguments[i + 1].getTypeName() + "\" is found"); } - if (allBranchesConstant) { - if (arguments[i + 1] instanceof ConstantObjectInspector) { - distinctConstants.add(((ConstantObjectInspector) arguments[i + 1]).getWritableConstantValue()); - } else { - allBranchesConstant = false; - } + if (arguments[i + 1] instanceof ConstantObjectInspector) { + distinctConstants.add(((ConstantObjectInspector) arguments[i + 1]).getWritableConstantValue()); } } - numberOfDistinctConstants = allBranchesConstant && !distinctConstants.isEmpty() - ? distinctConstants.size() : null; + numberOfDistinctConstants = distinctConstants.size(); return returnOIResolver.get(); } @@ -164,41 +154,19 @@ public StatEstimator getStatEstimator() { return new WhenStatEstimator(numberOfDistinctConstants); } - static class WhenStatEstimator implements StatEstimator { - private final Integer numberOfDistinctConstants; - - WhenStatEstimator(Integer numberOfDistinctConstants) { - this.numberOfDistinctConstants = numberOfDistinctConstants; + static class WhenStatEstimator extends BranchingStatEstimator { + WhenStatEstimator(int numberOfDistinctConstants) { + super(numberOfDistinctConstants); } @Override - public Optional estimate(List argStats) { - if (numberOfDistinctConstants != null) { - ColStatistics result = argStats.get(1).clone(); - result.setCountDistint(numberOfDistinctConstants); - for (int i = 3; i < argStats.size(); i += 2) { - if (argStats.get(i).getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(argStats.get(i).getAvgColLen()); - } - } - if (argStats.size() % 2 == 1) { - ColStatistics elseStat = argStats.get(argStats.size() - 1); - if (elseStat.getAvgColLen() > result.getAvgColLen()) { - result.setAvgColLen(elseStat.getAvgColLen()); - } - } - return Optional.of(result); - } - - // Fall back to pessimistic combining - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + protected void addBranchStats(PessimisticStatCombiner combiner, List argStats) { for (int i = 1; i < argStats.size(); i += 2) { combiner.add(argStats.get(i)); } if (argStats.size() % 2 == 1) { combiner.add(argStats.get(argStats.size() - 1)); } - return combiner.getResult(); } } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestBranchingStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestBranchingStatEstimator.java new file mode 100644 index 000000000000..b332a37cf50a --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestBranchingStatEstimator.java @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats.estimator; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.Arrays; +import java.util.List; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.junit.jupiter.api.Test; + +class TestBranchingStatEstimator { + + @Test + void testMultipleDistinctConstantsDominates() { + BranchingStatEstimator estimator = new SimpleBranchingStatEstimator(5); + + Optional result = estimator.estimate(Arrays.asList( + createColStats(1), + createColStats(1), + createColStats(1), + createColStats(1), + createColStats(1), + createColStats(3))); + + assertTrue(result.isPresent()); + assertEquals(5, result.get().getCountDistint()); + } + + @Test + void testColumnNdvDominates() { + BranchingStatEstimator estimator = new SimpleBranchingStatEstimator(2); + + Optional result = estimator.estimate(Arrays.asList( + createColStats(1), + createColStats(1), + createColStats(100))); + + assertTrue(result.isPresent()); + assertEquals(100, result.get().getCountDistint()); + } + + @Test + void testSingleConstantNoSyntheticAdded() { + BranchingStatEstimator estimator = new SimpleBranchingStatEstimator(1); + + Optional result = estimator.estimate(Arrays.asList( + createColStats(1), + createColStats(50))); + + assertTrue(result.isPresent()); + assertEquals(50, result.get().getCountDistint()); + } + + @Test + void testNoConstantsNoSyntheticAdded() { + BranchingStatEstimator estimator = new SimpleBranchingStatEstimator(0); + + Optional result = estimator.estimate(Arrays.asList( + createColStats(100), + createColStats(200))); + + assertTrue(result.isPresent()); + assertEquals(200, result.get().getCountDistint()); + } + + @Test + void testUnknownNdvPropagates() { + BranchingStatEstimator estimator = new SimpleBranchingStatEstimator(3); + + Optional result = estimator.estimate(Arrays.asList( + createColStats(1), + createColStats(1), + createColStats(1), + createColStats(0))); + + assertTrue(result.isPresent()); + assertEquals(0, result.get().getCountDistint()); + } + + @Test + void testAllConstantsKnown() { + BranchingStatEstimator estimator = new SimpleBranchingStatEstimator(3); + + Optional result = estimator.estimate(Arrays.asList( + createColStats(1), + createColStats(1), + createColStats(1))); + + assertTrue(result.isPresent()); + assertEquals(3, result.get().getCountDistint()); + } + + @Test + void testMaxAvgColLenPreserved() { + BranchingStatEstimator estimator = new SimpleBranchingStatEstimator(2); + + ColStatistics stat1 = createColStats(1); + stat1.setAvgColLen(10.0); + ColStatistics stat2 = createColStats(1); + stat2.setAvgColLen(25.0); + ColStatistics stat3 = createColStats(50); + stat3.setAvgColLen(15.0); + + Optional result = estimator.estimate(Arrays.asList(stat1, stat2, stat3)); + + assertTrue(result.isPresent()); + assertEquals(25.0, result.get().getAvgColLen()); + } + + private ColStatistics createColStats(long ndv) { + ColStatistics cs = new ColStatistics("col", "string"); + cs.setCountDistint(ndv); + cs.setNumNulls(0); + cs.setAvgColLen(10.0); + return cs; + } + + static class SimpleBranchingStatEstimator extends BranchingStatEstimator { + SimpleBranchingStatEstimator(int numberOfDistinctConstants) { + super(numberOfDistinctConstants); + } + + @Override + protected void addBranchStats(PessimisticStatCombiner combiner, List argStats) { + for (ColStatistics stat : argStats) { + combiner.add(stat); + } + } + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java index b07fcec0f522..9cfc561a5358 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java @@ -78,7 +78,7 @@ void testCombineTakesMaxOfNumNulls() { } @Test - void testCombineSetsCountDistinctToZero() { + void testCombineTakesMaxOfNdv() { ColStatistics stat1 = createStat("col1", "int", 100, 10, 4.0); ColStatistics stat2 = createStat("col2", "int", 200, 20, 4.0); @@ -86,6 +86,32 @@ void testCombineSetsCountDistinctToZero() { combiner.add(stat1); combiner.add(stat2); + ColStatistics combined = combiner.getResult().get(); + assertEquals(200, combined.getCountDistint()); + } + + @Test + void testCombineWithUnknownNdvReturnsZero() { + ColStatistics stat1 = createStat("col1", "int", 100, 10, 4.0); + ColStatistics stat2 = createStat("col2", "int", 0, 20, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + + ColStatistics combined = combiner.getResult().get(); + assertEquals(0, combined.getCountDistint()); + } + + @Test + void testCombineWithFirstStatUnknownNdvReturnsZero() { + ColStatistics stat1 = createStat("col1", "int", 0, 10, 4.0); + ColStatistics stat2 = createStat("col2", "int", 200, 20, 4.0); + + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + combiner.add(stat1); + combiner.add(stat2); + ColStatistics combined = combiner.getResult().get(); assertEquals(0, combined.getCountDistint()); } @@ -135,7 +161,7 @@ void testCombineMultipleStats() { combiner.add(stat3); ColStatistics combined = combiner.getResult().get(); - assertEquals(0, combined.getCountDistint()); + assertEquals(2000, combined.getCountDistint()); assertEquals(100, combined.getNumNulls()); assertEquals(8.0, combined.getAvgColLen()); } @@ -149,7 +175,7 @@ void testCombineSameColumnTwice() { combiner.add(stat); ColStatistics combined = combiner.getResult().get(); - assertEquals(0, combined.getCountDistint()); + assertEquals(100, combined.getCountDistint()); assertEquals(10, combined.getNumNulls()); assertEquals(4.0, combined.getAvgColLen()); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java index f91b93ac2b00..49fc1a8a4a83 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java @@ -51,9 +51,9 @@ void testAllArgumentsConstantDistinctValues() throws UDFArgumentTypeException { StatEstimator estimator = udf.getStatEstimator(); Optional result = estimator.estimate(Arrays.asList( - createColStats("arg1", 100, 10), - createColStats("arg2", 200, 20), - createColStats("arg3", 300, 30))); + createColStats("arg1", 1, 0), + createColStats("arg2", 1, 0), + createColStats("arg3", 1, 0))); assertTrue(result.isPresent()); assertEquals(3, result.get().getCountDistint()); @@ -75,9 +75,9 @@ void testAllArgumentsConstantWithDuplicates() throws UDFArgumentTypeException { StatEstimator estimator = udf.getStatEstimator(); Optional result = estimator.estimate(Arrays.asList( - createColStats("arg1", 100, 10), - createColStats("arg2", 200, 20), - createColStats("arg3", 300, 30))); + createColStats("arg1", 1, 0), + createColStats("arg2", 1, 0), + createColStats("arg3", 1, 0))); assertTrue(result.isPresent()); assertEquals(2, result.get().getCountDistint()); @@ -95,14 +95,14 @@ void testSingleConstantArgument() throws UDFArgumentTypeException { StatEstimator estimator = udf.getStatEstimator(); Optional result = estimator.estimate(Arrays.asList( - createColStats("arg1", 100, 10))); + createColStats("arg1", 1, 0))); assertTrue(result.isPresent()); assertEquals(1, result.get().getCountDistint()); } @Test - void testNonConstantArgumentFallsBackToPessimisticCombiner() throws UDFArgumentTypeException { + void testMixedConstantAndNonConstantArguments() throws UDFArgumentTypeException { GenericUDFCoalesce udf = new GenericUDFCoalesce(); ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( @@ -121,7 +121,7 @@ void testNonConstantArgumentFallsBackToPessimisticCombiner() throws UDFArgumentT createColStats("arg3", 300, 30))); assertTrue(result.isPresent()); - assertEquals(0, result.get().getCountDistint()); + assertEquals(300, result.get().getCountDistint()); } @Test @@ -140,7 +140,7 @@ void testAllNonConstantArguments() throws UDFArgumentTypeException { createColStats("arg3", 300, 30))); assertTrue(result.isPresent()); - assertEquals(0, result.get().getCountDistint()); + assertEquals(300, result.get().getCountDistint()); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java index d3ada4d033a2..83d3acff9798 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java @@ -48,11 +48,11 @@ void testBothBranchesConstantDistinctValues() throws UDFArgumentException { udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); StatEstimator estimator = udf.getStatEstimator(); - ColStatistics thenStats = createColStats("then_col", 100, 10); - ColStatistics elseStats = createColStats("else_col", 200, 20); Optional result = estimator.estimate( - Arrays.asList(createColStats("cond", 2, 0), thenStats, elseStats)); + Arrays.asList(createColStats("cond", 2, 0), + createColStats("then_col", 1, 0), + createColStats("else_col", 1, 0))); assertTrue(result.isPresent()); assertEquals(2, result.get().getCountDistint()); @@ -74,15 +74,15 @@ void testBothBranchesConstantSameValue() throws UDFArgumentException { Optional result = estimator.estimate( Arrays.asList(createColStats("cond", 2, 0), - createColStats("then_col", 100, 10), - createColStats("else_col", 200, 20))); + createColStats("then_col", 1, 0), + createColStats("else_col", 1, 0))); assertTrue(result.isPresent()); assertEquals(1, result.get().getCountDistint()); } @Test - void testNonConstantThenBranchFallsBackToPessimisticCombiner() throws UDFArgumentException { + void testNonConstantThenBranchWithConstantElse() throws UDFArgumentException { GenericUDFIf udf = new GenericUDFIf(); ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; @@ -100,11 +100,11 @@ void testNonConstantThenBranchFallsBackToPessimisticCombiner() throws UDFArgumen createColStats("else_col", 200, 20))); assertTrue(result.isPresent()); - assertEquals(0, result.get().getCountDistint()); + assertEquals(200, result.get().getCountDistint()); } @Test - void testNonConstantElseBranchFallsBackToPessimisticCombiner() throws UDFArgumentException { + void testConstantThenBranchWithNonConstantElse() throws UDFArgumentException { GenericUDFIf udf = new GenericUDFIf(); ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; @@ -122,7 +122,74 @@ void testNonConstantElseBranchFallsBackToPessimisticCombiner() throws UDFArgumen createColStats("else_col", 200, 20))); assertTrue(result.isPresent()); - assertEquals(0, result.get().getCountDistint()); + assertEquals(200, result.get().getCountDistint()); + } + + @Test + void testBothBranchesNonConstant() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), + createColStats("then_col", 100, 10), + createColStats("else_col", 200, 20))); + + assertTrue(result.isPresent()); + assertEquals(200, result.get().getCountDistint()); + } + + @Test + void testNullAndNonNullConstants() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, null); + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), + createColStats("then_col", 1, 0), + createColStats("else_col", 1, 0))); + + assertTrue(result.isPresent()); + assertEquals(2, result.get().getCountDistint()); + } + + @Test + void testBothNullConstants() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, null); + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, null); + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), + createColStats("then_col", 1, 0), + createColStats("else_col", 1, 0))); + + assertTrue(result.isPresent()); + assertEquals(1, result.get().getCountDistint()); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFWhenStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFWhenStatEstimator.java index b83f68ea0de0..26e82b935b67 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFWhenStatEstimator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFWhenStatEstimator.java @@ -47,17 +47,16 @@ void testAllBranchesConstantDistinctValues() throws UDFArgumentTypeException { ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( TypeInfoFactory.stringTypeInfo, new Text("C")); - // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN 'B' ELSE 'C' END udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB, constC}); StatEstimator estimator = udf.getStatEstimator(); Optional result = estimator.estimate(Arrays.asList( createColStats("cond1", 2, 0), - createColStats("then1", 100, 10), + createColStats("then1", 1, 0), createColStats("cond2", 2, 0), - createColStats("then2", 200, 20), - createColStats("else", 300, 30))); + createColStats("then2", 1, 0), + createColStats("else", 1, 0))); assertTrue(result.isPresent()); assertEquals(3, result.get().getCountDistint()); @@ -75,17 +74,16 @@ void testAllBranchesConstantWithDuplicates() throws UDFArgumentTypeException { ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( TypeInfoFactory.stringTypeInfo, new Text("B")); - // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN 'A' ELSE 'B' END udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constA2, constB}); StatEstimator estimator = udf.getStatEstimator(); Optional result = estimator.estimate(Arrays.asList( createColStats("cond1", 2, 0), - createColStats("then1", 100, 10), + createColStats("then1", 1, 0), createColStats("cond2", 2, 0), - createColStats("then2", 200, 20), - createColStats("else", 300, 30))); + createColStats("then2", 1, 0), + createColStats("else", 1, 0))); assertTrue(result.isPresent()); assertEquals(2, result.get().getCountDistint()); @@ -101,23 +99,22 @@ void testWithoutElseBranch() throws UDFArgumentTypeException { ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( TypeInfoFactory.stringTypeInfo, new Text("B")); - // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN 'B' END (no ELSE) udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB}); StatEstimator estimator = udf.getStatEstimator(); Optional result = estimator.estimate(Arrays.asList( createColStats("cond1", 2, 0), - createColStats("then1", 100, 10), + createColStats("then1", 1, 0), createColStats("cond2", 2, 0), - createColStats("then2", 200, 20))); + createColStats("then2", 1, 0))); assertTrue(result.isPresent()); assertEquals(2, result.get().getCountDistint()); } @Test - void testNonConstantBranchFallsBackToPessimisticCombiner() throws UDFArgumentTypeException { + void testMixedConstantAndNonConstantBranches() throws UDFArgumentTypeException { GenericUDFWhen udf = new GenericUDFWhen(); ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; @@ -127,7 +124,6 @@ void testNonConstantBranchFallsBackToPessimisticCombiner() throws UDFArgumentTyp ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( TypeInfoFactory.stringTypeInfo, new Text("C")); - // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN col ELSE 'C' END udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, nonConst, constC}); StatEstimator estimator = udf.getStatEstimator(); @@ -140,11 +136,11 @@ void testNonConstantBranchFallsBackToPessimisticCombiner() throws UDFArgumentTyp createColStats("else", 300, 30))); assertTrue(result.isPresent()); - assertEquals(0, result.get().getCountDistint()); + assertEquals(300, result.get().getCountDistint()); } @Test - void testNonConstantElseFallsBackToPessimisticCombiner() throws UDFArgumentTypeException { + void testNonConstantElseWithConstantBranches() throws UDFArgumentTypeException { GenericUDFWhen udf = new GenericUDFWhen(); ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; @@ -154,7 +150,6 @@ void testNonConstantElseFallsBackToPessimisticCombiner() throws UDFArgumentTypeE TypeInfoFactory.stringTypeInfo, new Text("B")); ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; - // CASE WHEN cond1 THEN 'A' WHEN cond2 THEN 'B' ELSE col END udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB, nonConst}); StatEstimator estimator = udf.getStatEstimator(); @@ -166,10 +161,198 @@ void testNonConstantElseFallsBackToPessimisticCombiner() throws UDFArgumentTypeE createColStats("then2", 200, 20), createColStats("else", 300, 30))); + assertTrue(result.isPresent()); + assertEquals(300, result.get().getCountDistint()); + } + + @Test + void testConstantsWithComplexExpression() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector complexExprOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, complexExprOI, constB}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 1, 0), + createColStats("cond2", 2, 0), + createColStats("complex_expr", 500, 0), + createColStats("else", 1, 0))); + + assertTrue(result.isPresent()); + assertEquals(500, result.get().getCountDistint()); + } + + @Test + void testConstantsWithSmallComplexExpression() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); + ObjectInspector complexExprOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB, boolOI, complexExprOI, constC}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 1, 0), + createColStats("cond2", 2, 0), + createColStats("then2", 1, 0), + createColStats("cond3", 2, 0), + createColStats("complex_expr", 2, 0), + createColStats("else", 1, 0))); + + assertTrue(result.isPresent()); + assertEquals(3, result.get().getCountDistint()); + } + + @Test + void testConstantsWithUnknownNdvColumn() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB, nonConst}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 1, 0), + createColStats("cond2", 2, 0), + createColStats("then2", 1, 0), + createColStats("else", 0, 0))); + assertTrue(result.isPresent()); assertEquals(0, result.get().getCountDistint()); } + @Test + void testManyConstantsWithSmallNdvColumn() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("B")); + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); + ObjectInspector constD = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("D")); + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + udf.initialize(new ObjectInspector[]{boolOI, constA, boolOI, constB, boolOI, constC, boolOI, constD, nonConst}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 1, 0), + createColStats("cond2", 2, 0), + createColStats("then2", 1, 0), + createColStats("cond3", 2, 0), + createColStats("then3", 1, 0), + createColStats("cond4", 2, 0), + createColStats("then4", 1, 0), + createColStats("else", 2, 0))); + + assertTrue(result.isPresent()); + assertEquals(4, result.get().getCountDistint()); + } + + @Test + void testAllBranchesNonConstant() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + + udf.initialize(new ObjectInspector[]{boolOI, nonConst, boolOI, nonConst, nonConst}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("col1", 100, 0), + createColStats("cond2", 2, 0), + createColStats("col2", 200, 0), + createColStats("col3", 300, 0))); + + assertTrue(result.isPresent()); + assertEquals(300, result.get().getCountDistint()); + } + + @Test + void testNullConstantWithNonNullConstants() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constNull = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, null); + ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector constNull2 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, null); + + udf.initialize(new ObjectInspector[]{boolOI, constNull, boolOI, constA, constNull2}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 1, 0), + createColStats("cond2", 2, 0), + createColStats("then2", 1, 0), + createColStats("else", 1, 0))); + + assertTrue(result.isPresent()); + assertEquals(2, result.get().getCountDistint()); + } + + @Test + void testAllNullConstants() throws UDFArgumentTypeException { + GenericUDFWhen udf = new GenericUDFWhen(); + + ObjectInspector boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector constNull = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, null); + + udf.initialize(new ObjectInspector[]{boolOI, constNull, boolOI, constNull, constNull}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate(Arrays.asList( + createColStats("cond1", 2, 0), + createColStats("then1", 1, 0), + createColStats("cond2", 2, 0), + createColStats("then2", 1, 0), + createColStats("else", 1, 0))); + + assertTrue(result.isPresent()); + assertEquals(1, result.get().getCountDistint()); + } + @Test void testConstantBranchesTakesMaxAvgColLen() throws UDFArgumentTypeException { GenericUDFWhen udf = new GenericUDFWhen(); diff --git a/ql/src/test/queries/clientpositive/branching_expr_ndv.q b/ql/src/test/queries/clientpositive/branching_expr_ndv.q new file mode 100644 index 000000000000..30cce6e82986 --- /dev/null +++ b/ql/src/test/queries/clientpositive/branching_expr_ndv.q @@ -0,0 +1,75 @@ +CREATE TABLE t (cond INT, c2 STRING, c100 STRING, c0 STRING); +ALTER TABLE t UPDATE STATISTICS SET('numRows'='10000','rawDataSize'='1000000'); +ALTER TABLE t UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0'); +ALTER TABLE t UPDATE STATISTICS FOR COLUMN c2 SET('numDVs'='2','numNulls'='0','avgColLen'='5','maxColLen'='10'); +ALTER TABLE t UPDATE STATISTICS FOR COLUMN c100 SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10'); +ALTER TABLE t UPDATE STATISTICS FOR COLUMN c0 SET('numDVs'='0','numNulls'='0','avgColLen'='5','maxColLen'='10'); + +-- CASE WHEN: all constants distinct (NDV=3) +EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END; + +-- CASE WHEN: all constants with duplicate (NDV=2) +EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'A' ELSE 'B' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'A' ELSE 'B' END; + +-- CASE WHEN: constants with NULL (NDV=1, NULL literal is not a ConstantObjectInspector) +EXPLAIN SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL ELSE 'A' END x FROM t GROUP BY CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL ELSE 'A' END; + +-- CASE WHEN: all NULL constants (NDV=1) +EXPLAIN SELECT CASE WHEN cond=1 THEN NULL ELSE NULL END x FROM t GROUP BY CASE WHEN cond=1 THEN NULL ELSE NULL END; + +-- CASE WHEN: 3 constants + column, constants dominate (NDV=max(3,2)=3) +EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END; + +-- CASE WHEN: 2 constants + column, column dominates (NDV=max(2,100)=100) +EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END; + +-- CASE WHEN: constant + unknown column (when NDV=0, Hive uses numRows/2 fallback) +EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' ELSE c0 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' ELSE c0 END; + +-- CASE WHEN: all columns, no constants (NDV=max(2,100)=100) +EXPLAIN SELECT CASE WHEN cond=1 THEN c2 ELSE c100 END x FROM t GROUP BY CASE WHEN cond=1 THEN c2 ELSE c100 END; + +-- CASE WHEN: no ELSE clause (NDV=1, implicit NULL ELSE is not a ConstantObjectInspector) +EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' END; + +-- IF: both constants distinct (NDV=2) +EXPLAIN SELECT IF(cond>5, 'A', 'B') x FROM t GROUP BY IF(cond>5, 'A', 'B'); + +-- IF: both constants same (NDV=1) +EXPLAIN SELECT IF(cond>5, 'A', 'A') x FROM t GROUP BY IF(cond>5, 'A', 'A'); + +-- IF: one NULL one constant (NDV=1, NULL literal is not a ConstantObjectInspector) +EXPLAIN SELECT IF(cond>5, NULL, 'A') x FROM t GROUP BY IF(cond>5, NULL, 'A'); + +-- IF: both NULL (NDV=1) +EXPLAIN SELECT IF(cond>5, NULL, NULL) x FROM t GROUP BY IF(cond>5, NULL, NULL); + +-- IF: constant + column (NDV=max(1,100)=100) +EXPLAIN SELECT IF(cond>5, 'A', c100) x FROM t GROUP BY IF(cond>5, 'A', c100); + +-- IF: both columns (NDV=max(2,100)=100) +EXPLAIN SELECT IF(cond>5, c2, c100) x FROM t GROUP BY IF(cond>5, c2, c100); + +-- IF: constant + unknown column (when NDV=0, Hive uses numRows/2 fallback) +EXPLAIN SELECT IF(cond>5, 'A', c0) x FROM t GROUP BY IF(cond>5, 'A', c0); + +-- COALESCE: all constants (NDV=1, constant-folded to first non-null 'A') +EXPLAIN SELECT COALESCE('A', 'B', 'C') x FROM t GROUP BY COALESCE('A', 'B', 'C'); + +-- COALESCE: constants with duplicate (NDV=1, constant-folded to 'A') +EXPLAIN SELECT COALESCE('A', 'A', 'B') x FROM t GROUP BY COALESCE('A', 'A', 'B'); + +-- COALESCE: column + constants, column dominates (NDV=max(2,100)=100) +EXPLAIN SELECT COALESCE(c100, 'A', 'B') x FROM t GROUP BY COALESCE(c100, 'A', 'B'); + +-- COALESCE: column + constants, rewritten to IF (NDV=max(1,2)=2) +EXPLAIN SELECT COALESCE(c2, 'A', 'B', 'C') x FROM t GROUP BY COALESCE(c2, 'A', 'B', 'C'); + +-- COALESCE: all columns (NDV=max(2,100)=100) +EXPLAIN SELECT COALESCE(c2, c100) x FROM t GROUP BY COALESCE(c2, c100); + +-- COALESCE: unknown column + constant (when NDV=0, Hive uses numRows/2 fallback) +EXPLAIN SELECT COALESCE(c0, 'A') x FROM t GROUP BY COALESCE(c0, 'A'); + +-- COALESCE: NULL first arg simplified away, then rewritten to IF (NDV=max(1,100)=100) +EXPLAIN SELECT COALESCE(NULL, c100, 'A') x FROM t GROUP BY COALESCE(NULL, c100, 'A'); diff --git a/ql/src/test/results/clientpositive/llap/branching_expr_ndv.q.out b/ql/src/test/results/clientpositive/llap/branching_expr_ndv.q.out new file mode 100644 index 000000000000..e9ef4cb292b9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/branching_expr_ndv.q.out @@ -0,0 +1,1553 @@ +PREHOOK: query: CREATE TABLE t (cond INT, c2 STRING, c100 STRING, c0 STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: CREATE TABLE t (cond INT, c2 STRING, c100 STRING, c0 STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS SET('numRows'='10000','rawDataSize'='1000000') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS SET('numRows'='10000','rawDataSize'='1000000') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN cond SET('numDVs'='10','numNulls'='0') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c2 SET('numDVs'='2','numNulls'='0','avgColLen'='5','maxColLen'='10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c2 SET('numDVs'='2','numNulls'='0','avgColLen'='5','maxColLen'='10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c100 SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c100 SET('numDVs'='100','numNulls'='0','avgColLen'='5','maxColLen'='10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c0 SET('numDVs'='0','numNulls'='0','avgColLen'='5','maxColLen'='10') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@t +PREHOOK: Output: default@t +POSTHOOK: query: ALTER TABLE t UPDATE STATISTICS FOR COLUMN c0 SET('numDVs'='0','numNulls'='0','avgColLen'='5','maxColLen'='10') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@t +POSTHOOK: Output: default@t +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE 'C' END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') ELSE ('C') END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'A' ELSE 'B' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'A' ELSE 'B' END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'A' ELSE 'B' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'A' ELSE 'B' END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond) IN (1, 2), 'A', 'B') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL ELSE 'A' END x FROM t GROUP BY CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL ELSE 'A' END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL ELSE 'A' END x FROM t GROUP BY CASE WHEN cond=1 THEN NULL WHEN cond=2 THEN NULL ELSE 'A' END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond) IN (1, 2), null, 'A') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN NULL ELSE NULL END x FROM t GROUP BY CASE WHEN cond=1 THEN NULL ELSE NULL END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN NULL ELSE NULL END x FROM t GROUP BY CASE WHEN cond=1 THEN NULL ELSE NULL END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: true (type: boolean) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' WHEN cond=3 THEN 'C' ELSE c2 END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') WHEN ((cond = 3)) THEN ('C') ELSE (c2) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' ELSE c100 END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') ELSE (c100) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' ELSE c0 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' ELSE c0 END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' ELSE c0 END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' ELSE c0 END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond = 1), 'A', c0) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5000 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5000 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2500 Data size: 222500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2500 Data size: 222500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN c2 ELSE c100 END x FROM t GROUP BY CASE WHEN cond=1 THEN c2 ELSE c100 END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN c2 ELSE c100 END x FROM t GROUP BY CASE WHEN cond=1 THEN c2 ELSE c100 END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond = 1), c2, c100) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' END +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' END x FROM t GROUP BY CASE WHEN cond=1 THEN 'A' WHEN cond=2 THEN 'B' END +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: CASE WHEN ((cond = 1)) THEN ('A') WHEN ((cond = 2)) THEN ('B') ELSE (null) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT IF(cond>5, 'A', 'B') x FROM t GROUP BY IF(cond>5, 'A', 'B') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT IF(cond>5, 'A', 'B') x FROM t GROUP BY IF(cond>5, 'A', 'B') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond > 5), 'A', 'B') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT IF(cond>5, 'A', 'A') x FROM t GROUP BY IF(cond>5, 'A', 'A') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT IF(cond>5, 'A', 'A') x FROM t GROUP BY IF(cond>5, 'A', 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: true (type: boolean) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'A' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT IF(cond>5, NULL, 'A') x FROM t GROUP BY IF(cond>5, NULL, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT IF(cond>5, NULL, 'A') x FROM t GROUP BY IF(cond>5, NULL, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond > 5), null, 'A') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 40000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT IF(cond>5, NULL, NULL) x FROM t GROUP BY IF(cond>5, NULL, NULL) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT IF(cond>5, NULL, NULL) x FROM t GROUP BY IF(cond>5, NULL, NULL) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: true (type: boolean) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: null (type: void) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT IF(cond>5, 'A', c100) x FROM t GROUP BY IF(cond>5, 'A', c100) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT IF(cond>5, 'A', c100) x FROM t GROUP BY IF(cond>5, 'A', c100) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond > 5), 'A', c100) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT IF(cond>5, c2, c100) x FROM t GROUP BY IF(cond>5, c2, c100) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT IF(cond>5, c2, c100) x FROM t GROUP BY IF(cond>5, c2, c100) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond > 5), c2, c100) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 1820000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT IF(cond>5, 'A', c0) x FROM t GROUP BY IF(cond>5, 'A', c0) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT IF(cond>5, 'A', c0) x FROM t GROUP BY IF(cond>5, 'A', c0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if((cond > 5), 'A', c0) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 930000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5000 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5000 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2500 Data size: 222500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2500 Data size: 222500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE('A', 'B', 'C') x FROM t GROUP BY COALESCE('A', 'B', 'C') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT COALESCE('A', 'B', 'C') x FROM t GROUP BY COALESCE('A', 'B', 'C') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: true (type: boolean) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'A' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE('A', 'A', 'B') x FROM t GROUP BY COALESCE('A', 'A', 'B') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT COALESCE('A', 'A', 'B') x FROM t GROUP BY COALESCE('A', 'A', 'B') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 10000 Data size: 1000000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: true (type: boolean) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'A' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 85 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE(c100, 'A', 'B') x FROM t GROUP BY COALESCE(c100, 'A', 'B') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT COALESCE(c100, 'A', 'B') x FROM t GROUP BY COALESCE(c100, 'A', 'B') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if(c100 is not null, c100, 'A') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE(c2, 'A', 'B', 'C') x FROM t GROUP BY COALESCE(c2, 'A', 'B', 'C') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT COALESCE(c2, 'A', 'B', 'C') x FROM t GROUP BY COALESCE(c2, 'A', 'B', 'C') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if(c2 is not null, c2, 'A') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE(c2, c100) x FROM t GROUP BY COALESCE(c2, c100) +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT COALESCE(c2, c100) x FROM t GROUP BY COALESCE(c2, c100) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 1780000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if(c2 is not null, c2, c100) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 1780000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE(c0, 'A') x FROM t GROUP BY COALESCE(c0, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT COALESCE(c0, 'A') x FROM t GROUP BY COALESCE(c0, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if(c0 is not null, c0, 'A') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5000 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5000 Data size: 445000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 2500 Data size: 222500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2500 Data size: 222500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE(NULL, c100, 'A') x FROM t GROUP BY COALESCE(NULL, c100, 'A') +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN SELECT COALESCE(NULL, c100, 'A') x FROM t GROUP BY COALESCE(NULL, c100, 'A') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 10000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: if(c100 is not null, c100, 'A') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 100 Data size: 8900 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + From bdc395f16e4a0bd70be818523346638fc6890d28 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Thu, 5 Feb 2026 16:23:00 -0800 Subject: [PATCH 12/14] HIVE-29368: a misc tweak for empty tables + .out changes --- .../annotation/StatsRulesProcFactory.java | 5 + .../clientpositive/llap/auto_join32.q.out | 14 +- .../clientpositive/llap/cbo_const.q.out | 10 +- .../clientpositive/llap/constant_prop_3.q.out | 28 +- .../clientpositive/llap/deleteAnalyze.q.out | 4 +- .../llap/infer_bucket_sort_dyn_part.q.out | 10 +- .../clientpositive/llap/innerjoin1.q.out | 12 +- .../llap/list_bucket_dml_6.q.out | 20 +- .../llap/list_bucket_dml_7.q.out | 20 +- .../llap/list_bucket_dml_8.q.out | 10 +- .../clientpositive/llap/load_dyn_part14.q.out | 24 +- .../llap/materialized_view_rewrite_5.q.out | 12 +- .../llap/merge_dynamic_partition4.q.out | 10 +- .../llap/merge_dynamic_partition5.q.out | 10 +- .../llap/pessimistic_stat_combiner_ndv.q.out | 16 +- .../llap/vector_binary_join_groupby.q.out | 14 +- .../llap/vector_full_outer_join_date.q.out | 8 +- .../llap/vector_identity_reuse.q.out | 22 +- .../perf/tpcds30tb/json/query1.q.out | 6 +- .../perf/tpcds30tb/json/query10.q.out | 2 +- .../perf/tpcds30tb/json/query11.q.out | 2 +- .../perf/tpcds30tb/json/query12.q.out | 744 ++++---- .../perf/tpcds30tb/json/query13.q.out | 2 +- .../perf/tpcds30tb/json/query14.q.out | 24 +- .../perf/tpcds30tb/json/query15.q.out | 2 +- .../perf/tpcds30tb/json/query16.q.out | 1532 ++++++++--------- .../perf/tpcds30tb/json/query17.q.out | 14 +- .../perf/tpcds30tb/json/query18.q.out | 6 +- .../perf/tpcds30tb/json/query19.q.out | 10 +- .../perf/tpcds30tb/json/query2.q.out | 4 +- .../perf/tpcds30tb/json/query20.q.out | 738 ++++---- .../perf/tpcds30tb/json/query3.q.out | 6 +- .../perf/tpcds30tb/json/query4.q.out | 2 +- .../perf/tpcds30tb/json/query5.q.out | 1300 +++++++------- .../perf/tpcds30tb/json/query6.q.out | 12 +- .../perf/tpcds30tb/json/query7.q.out | 6 +- .../perf/tpcds30tb/json/query8.q.out | 6 +- .../tez/flatten_union_subdir.q.out | 18 +- 38 files changed, 2345 insertions(+), 2340 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java index 17932ca608f3..01d6d55901db 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java @@ -3250,6 +3250,11 @@ static boolean satisfyPrecondition(Statistics stats) { } static boolean satisfyPrecondition(Statistics stats, List joinKeys) { + // Empty tables have numRows bumped from 0 to 1 (see BasicStats.SetMinRowNumber), + // so numRows <= 1 may indicate an empty table where NDV=0 is legitimate, not "unknown" + if (stats.getNumRows() <= 1) { + return true; + } for (String col : joinKeys) { ColStatistics cs = stats.getColumnStatisticsFromColName(col); if (cs != null && cs.getCountDistint() == 0L) { diff --git a/ql/src/test/results/clientpositive/llap/auto_join32.q.out b/ql/src/test/results/clientpositive/llap/auto_join32.q.out index 3a7e7b404fcb..3f1783db3a03 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join32.q.out @@ -546,7 +546,7 @@ STAGE PLANS: TableScan alias: s filterExpr: ((p = 'bar') and name is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_34_container, bigKeyColName:name, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_34_container, bigKeyColName:name, smallTablePos:1, keyRatio:1.0 Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((p = 'bar') and name is not null) (type: boolean) @@ -564,19 +564,19 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col2 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: unknown Map 3 @@ -608,16 +608,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/cbo_const.q.out b/ql/src/test/results/clientpositive/llap/cbo_const.q.out index 0b9e4d682011..43b0bf4577ad 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_const.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_const.q.out @@ -298,13 +298,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2310 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 2310 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string) Reducer 3 Execution mode: llap @@ -316,14 +316,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 275 Data size: 48950 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 275 Data size: 48950 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 275 Data size: 48950 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out b/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out index 51932053eab0..412038304bcc 100644 --- a/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out +++ b/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out @@ -317,30 +317,30 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7, _col9 - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col7 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col9 (type: boolean) outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9 - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col6 = 0L) or (_col9 is null and (_col7 >= _col6) and _col1 is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int) outputColumnNames: _col1, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int) null sort order: zzzz sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -348,32 +348,32 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(_col0) keys: _col1 (type: string), _col2 (type: string), _col3 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) null sort order: azzz sort order: -+++ - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -388,13 +388,13 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: boolean) Reducer 9 Execution mode: vectorized, llap diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index 5068a86de524..6da801115ace 100644 --- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -173,9 +173,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=1 width=228) + Select Operator [SEL_9] (rows=1 width=96) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_25] (rows=1 width=228) + Merge Join Operator [MERGEJOIN_25] (rows=1 width=96) Conds:RS_28._col1=RS_31._col0(Inner),Output:["_col0","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_28] diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out index b07fc4ca6103..17db16415c01 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_dyn_part.q.out @@ -492,13 +492,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 158 Data size: 103016 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 158 Data size: 103016 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Reducer 3 Execution mode: vectorized, llap @@ -508,14 +508,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 79 Data size: 40764 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 79 Data size: 56248 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 79 Data size: 56248 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/innerjoin1.q.out b/ql/src/test/results/clientpositive/llap/innerjoin1.q.out index 075e9e8985ba..8e9dbf9b583d 100644 --- a/ql/src/test/results/clientpositive/llap/innerjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/innerjoin1.q.out @@ -142,14 +142,14 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -249,14 +249,14 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 1360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 680 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out index a312142af7c1..4f4a0b3df537 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_6.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -461,7 +461,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,7 +469,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -564,18 +564,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out index 9a960de8085f..6e45676ba107 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_7.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -461,7 +461,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -469,7 +469,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -564,18 +564,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out index 979ef4f18835..4e5651cccc53 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_8.q.out @@ -96,7 +96,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator bucketingVersion: 2 key expressions: _col0 (type: string), _col1 (type: string) @@ -104,7 +104,7 @@ STAGE PLANS: numBuckets: -1 sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) auto parallelism: true @@ -199,18 +199,18 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator bucketingVersion: 2 compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out index e3157b9f9251..268a10996c65 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part14.q.out @@ -115,16 +115,16 @@ STAGE PLANS: Group By Operator aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector_hll(key) keys: value (type: string) - minReductionHashAggr: 0.6666666 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reducer 4 Execution mode: vectorized, llap @@ -134,14 +134,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 514 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 257 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col1,0)) (type: bigint), COALESCE(_col2,0) (type: double), (_col3 - _col4) (type: bigint), COALESCE(ndv_compute_bit_vector(_col5),0) (type: bigint), _col5 (type: binary), _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 2 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 351 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 702 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 351 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -171,16 +171,16 @@ STAGE PLANS: Group By Operator aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector_hll(key) keys: value (type: string) - minReductionHashAggr: 0.6666666 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Reducer 6 Execution mode: vectorized, llap @@ -207,16 +207,16 @@ STAGE PLANS: Group By Operator aggregations: max(length(key)), avg(COALESCE(length(key),0)), count(1), count(key), compute_bit_vector_hll(key) keys: value (type: string) - minReductionHashAggr: 0.6666666 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: struct), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: binary) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out index 1d5de69e74b5..b98b0853d686 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out @@ -986,20 +986,20 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col3) keys: _col1 (type: varchar(256)) - minReductionHashAggr: 0.4 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: varchar(256)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: varchar(256)) - Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -1009,10 +1009,10 @@ STAGE PLANS: keys: KEY._col0 (type: varchar(256)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out index 13b1ace4f633..85f1ea93c068 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition4.q.out @@ -180,13 +180,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 326000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs @@ -198,14 +198,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 250 Data size: 129000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 178000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out index e27223e6f3d4..ab9805c19485 100644 --- a/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/llap/merge_dynamic_partition5.q.out @@ -156,13 +156,13 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 309 Data size: 201468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 309 Data size: 201468 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1304 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: struct), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: binary), _col7 (type: int), _col8 (type: struct), _col9 (type: bigint), _col10 (type: binary) Execution mode: llap LLAP IO: no inputs @@ -174,14 +174,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 154 Data size: 79464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'STRING' (type: string), UDFToLong(COALESCE(_col2,0)) (type: bigint), COALESCE(_col3,0) (type: double), (_col4 - _col5) (type: bigint), COALESCE(ndv_compute_bit_vector(_col6),0) (type: bigint), _col6 (type: binary), 'STRING' (type: string), UDFToLong(COALESCE(_col7,0)) (type: bigint), COALESCE(_col8,0) (type: double), (_col4 - _col9) (type: bigint), COALESCE(ndv_compute_bit_vector(_col10),0) (type: bigint), _col10 (type: binary), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 154 Data size: 109648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 154 Data size: 109648 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2 Data size: 1424 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out b/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out index 2b2c665e8f68..4cb960a93ecd 100644 --- a/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out +++ b/ql/src/test/results/clientpositive/llap/pessimistic_stat_combiner_ndv.q.out @@ -28,14 +28,14 @@ FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub GROUP BY x PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: EXPLAIN SELECT x, COUNT(*) FROM (SELECT IF(cat > 50, 'A', 'B') x FROM t1) sub GROUP BY x POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -105,7 +105,7 @@ FROM ( GROUP BY x PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: EXPLAIN SELECT x, COUNT(*) FROM ( @@ -115,7 +115,7 @@ FROM ( GROUP BY x POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -185,7 +185,7 @@ FROM ( GROUP BY x PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: EXPLAIN SELECT x, COUNT(*) FROM ( @@ -195,7 +195,7 @@ FROM ( GROUP BY x POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -345,7 +345,7 @@ JOIN t2 b ON a.k = b.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### POSTHOOK: query: EXPLAIN SELECT a.k, a.total, a.sample, b.v1 FROM ( @@ -387,7 +387,7 @@ JOIN t2 b ON a.k = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: hdfs://### HDFS PATH ### +#### A masked pattern was here #### STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 41bc14e5e354..c6fa83e9a6e4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -137,7 +137,7 @@ STAGE PLANS: TableScan alias: t1 filterExpr: bin is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:bin, smallTablePos:1, keyRatio:0.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:bin, smallTablePos:1, keyRatio:1.1 Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -170,7 +170,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 3 - Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 37492 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) outputColumnNames: _col0 @@ -179,7 +179,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [23] selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 23:int - Statistics: Num rows: 10000 Data size: 6819968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 37492 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0) Group By Vectorization: @@ -192,7 +192,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: sort order: @@ -200,7 +200,7 @@ STAGE PLANS: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -278,13 +278,13 @@ STAGE PLANS: projectedOutputColumnNums: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out index d1d49f77854b..b585e0adef4f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_full_outer_join_date.q.out @@ -190,7 +190,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 4 - Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: int) @@ -202,7 +202,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:date, 3:date - Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: date), _col3 (type: date) Reducer 3 Execution mode: vectorized, llap @@ -227,13 +227,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1, 3] - Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 9 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out index 58aa422777d5..273a92b28dec 100644 --- a/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_identity_reuse.q.out @@ -198,14 +198,14 @@ STAGE PLANS: outputColumnNames: _col1, _col3, _col4, _col5 input vertices: 1 Map 3 - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator Filter Vectorization: className: VectorFilterOperator native: true predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1:int, val 5), FilterLongColEqualLongScalar(col 7:int, val 10), FilterLongColEqualLongScalar(col 7:bigint, val 571)(children: col 7:int)) predicate: ((_col1 = 5) or (_col5 = 10) or (UDFToLong(_col5) = 571L)) (type: boolean) - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col5 (type: int), if(_col3 is not null, _col3, UDFToInteger(_col4)) (type: int) outputColumnNames: _col0, _col1, _col2 @@ -214,7 +214,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 7, 9] selectExpressions: IfExprColumnCondExpr(col 8:boolean, col 5:intcol 6:smallint)(children: IsNotNull(col 5:int) -> 8:boolean, col 5:int, col 6:smallint) -> 9:int - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -233,12 +233,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 4 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: - keys: _col0 (type: int) null sort order: z - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Top N Key Vectorization: className: VectorTopNKeyOperator @@ -251,7 +251,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [5, 9, 7] - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z @@ -262,7 +262,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 9:int, 7:int - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -406,13 +406,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 2 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 922 (type: int), _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 @@ -421,13 +421,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 1, 2] selectExpressions: ConstantVectorExpression(val 922) -> 3:int - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query1.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query1.q.out index 5f7f0f1b614d..e017a3b4c4c2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query1.q.out @@ -868,7 +868,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1374,13 +1374,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query10.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query10.q.out index 83e4bc87385b..23b2448972ee 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query10.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query10.q.out @@ -1436,7 +1436,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query11.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query11.q.out index 88ce388517a6..bd93eade32d8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query11.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query11.q.out @@ -666,7 +666,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query12.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query12.q.out index 88242502588a..0c0651b9de01 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query12.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query12.q.out @@ -511,139 +511,164 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "item" + "date_dim" ], - "table:alias": "item", + "table:alias": "date_dim", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "i_rec_start_date" + "name": "d_date" }, { - "type": "DATE", + "type": "INTEGER", "nullable": true, - "name": "i_rec_end_date" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 200, - "name": "i_item_desc" + "name": "d_week_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" + "name": "d_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" + "name": "d_year" }, { "type": "INTEGER", "nullable": true, - "name": "i_brand_id" + "name": "d_dow" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_brand" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "i_class_id" + "name": "d_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_class" + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "d_fy_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_category" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "i_manufact_id" + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 6, + "name": "d_quarter_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "i_container" + "name": "d_first_dom" }, { "type": "INTEGER", "nullable": true, - "name": "i_manager_id" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 1, + "name": "d_current_day" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_week" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_month" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_quarter" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -687,195 +712,221 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 }, { - "name": "i_item_id", - "ndv": 247524 + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 }, { - "name": "i_item_desc", - "ndv": 341846 + "name": "d_date_id", + "ndv": 71022 }, { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 }, { - "name": "i_class", - "ndv": 99 + "name": "d_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_category", - "ndv": 11 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 }, { - "name": "i_brand", - "ndv": 742 + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "i_class_id", - "ndv": 16, - "minValue": 1, - "maxValue": 16 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_category_id", - "ndv": 10, + "name": "d_fy_quarter_seq", + "ndv": 808, "minValue": 1, - "maxValue": 10 + "maxValue": 801 }, { - "name": "i_manufact_id", - "ndv": 987, + "name": "d_fy_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 1000 + "maxValue": 10436 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "d_day_name", + "ndv": 7 }, { - "name": "i_size", - "ndv": 8 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "d_holiday", + "ndv": 2 }, { - "name": "i_color", - "ndv": 95 + "name": "d_weekend", + "ndv": 2 }, { - "name": "i_units", - "ndv": 21 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "i_container", - "ndv": 2 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "i_product_name", - "ndv": 461487 - } - ] - }, - { + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 + }, + { + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 + }, + { + "name": "d_current_day", + "ndv": 1 + }, + { + "name": "d_current_week", + "ndv": 1 + }, + { + "name": "d_current_month", + "ndv": 2 + }, + { + "name": "d_current_quarter", + "ndv": 2 + }, + { + "name": "d_current_year", + "ndv": 2 + } + ] + }, + { "id": "4", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "IN", - "kind": "OTHER_FUNCTION", + "name": "BETWEEN", + "kind": "BETWEEN", "syntax": "SPECIAL" }, "operands": [ { - "input": 12, - "name": "$12" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "literal": "Books", + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "CHAR", - "nullable": false, - "precision": 5 + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 } }, { - "literal": "Jewelry", + "literal": 979257600000, "type": { - "type": "CHAR", + "type": "TIMESTAMP", "nullable": false, - "precision": 7 + "precision": 9 } }, { - "literal": "Sports", + "literal": 981849600000, "type": { - "type": "CHAR", + "type": "TIMESTAMP", "nullable": false, - "precision": 6 + "precision": 9 } } ] }, - "rowCount": 115500 + "rowCount": 18262.25 }, { "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk", - "i_item_id", - "i_item_desc", - "i_current_price", - "i_class", - "i_category" + "d_date_sk" ], "exprs": [ { "input": 0, "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, - { - "input": 4, - "name": "$4" - }, - { - "input": 5, - "name": "$5" - }, - { - "input": 10, - "name": "$10" - }, - { - "input": 12, - "name": "$12" } ], - "rowCount": 115500 + "rowCount": 18262.25 }, { "id": "6", @@ -888,8 +939,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 2, + "name": "$2" }, { "input": 3, @@ -904,171 +955,146 @@ "2", "5" ], - "rowCount": 336714399969255 + "rowCount": 5.323950260466258E13 }, { "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "item" ], - "table:alias": "date_dim", + "table:alias": "item", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 462000, + "avgRowSize": 1033, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "i_item_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "i_item_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_month_seq" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_week_seq" + "name": "i_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_quarter_seq" + "name": "i_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_year" + "precision": 200, + "name": "i_item_desc" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_dow" + "precision": 7, + "scale": 2, + "name": "i_current_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_moy" + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" }, { "type": "INTEGER", "nullable": true, - "name": "d_dom" + "name": "i_brand_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_qoy" + "precision": 50, + "name": "i_brand" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_year" + "name": "i_class_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 50, + "name": "i_class" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "i_category" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "i_manufact_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "i_manufact" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 20, + "name": "i_size" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_first_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_last_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_ly" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_lq" + "precision": 20, + "name": "i_formulation" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "i_units" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 10, + "name": "i_container" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "name": "i_manager_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -1112,150 +1138,112 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 - }, - { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 - }, - { - "name": "d_week_seq", - "ndv": 11297, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 10436 + "maxValue": 462000 }, { - "name": "d_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "i_class", + "ndv": 99 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "i_category", + "ndv": 11 }, { - "name": "d_qoy", + "name": "i_rec_start_date", "ndv": 4, - "minValue": 1, - "maxValue": 4 - }, - { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 - }, - { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "minValue": 10161, + "maxValue": 11622 }, { - "name": "d_fy_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "d_day_name", - "ndv": 7 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 }, { - "name": "d_holiday", - "ndv": 2 + "name": "i_brand", + "ndv": 742 }, { - "name": "d_weekend", - "ndv": 2 + "name": "i_class_id", + "ndv": 16, + "minValue": 1, + "maxValue": 16 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "i_manufact_id", + "ndv": 987, + "minValue": 1, + "maxValue": 1000 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "i_manufact", + "ndv": 1004 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "i_size", + "ndv": 8 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "i_formulation", + "ndv": 344236 }, { - "name": "d_current_day", - "ndv": 1 + "name": "i_color", + "ndv": 95 }, { - "name": "d_current_week", - "ndv": 1 + "name": "i_units", + "ndv": 21 }, { - "name": "d_current_month", + "name": "i_container", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_current_year", - "ndv": 2 + "name": "i_product_name", + "ndv": 461487 } ] }, @@ -1264,69 +1252,81 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", + "name": "IN", + "kind": "OTHER_FUNCTION", "syntax": "SPECIAL" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } + "input": 12, + "name": "$12" }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], + "literal": "Books", "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 + "type": "CHAR", + "nullable": false, + "precision": 5 } }, { - "literal": 979257600000, + "literal": "Jewelry", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 7 } }, { - "literal": 981849600000, + "literal": "Sports", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 6 } } ] }, - "rowCount": 18262.25 + "rowCount": 115500 }, { "id": "9", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "i_item_sk", + "i_item_id", + "i_item_desc", + "i_current_price", + "i_class", + "i_category" ], "exprs": [ { "input": 0, "name": "$0" + }, + { + "input": 1, + "name": "$1" + }, + { + "input": 4, + "name": "$4" + }, + { + "input": 5, + "name": "$5" + }, + { + "input": 10, + "name": "$10" + }, + { + "input": 12, + "name": "$12" } ], - "rowCount": 18262.25 + "rowCount": 115500 }, { "id": "10", @@ -1339,12 +1339,12 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 0, + "name": "$0" }, { - "input": 9, - "name": "$9" + "input": 4, + "name": "$4" } ] }, @@ -1355,17 +1355,17 @@ "6", "9" ], - "rowCount": 922374382625779072 + "rowCount": 922374382625779200 }, { "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ - 4, 5, 6, 7, - 8 + 8, + 9 ], "aggs": [ { @@ -1387,7 +1387,7 @@ "name": null } ], - "rowCount": 92237438262577904 + "rowCount": 92237438262577920 }, { "id": "12", @@ -1502,7 +1502,7 @@ "name": "$0" } ], - "rowCount": 92237438262577904 + "rowCount": 92237438262577920 }, { "id": "13", diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query13.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query13.q.out index eb82e309be7d..6d1952d92d18 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query13.q.out @@ -943,7 +943,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query14.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query14.q.out index abb39f2dfa24..c211d0f88132 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query14.q.out @@ -617,7 +617,7 @@ Warning: Map Join MAPJOIN[1187][bigTable=?] in task 'Reducer 26' is a cross prod }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1090,13 +1090,13 @@ Warning: Map Join MAPJOIN[1187][bigTable=?] in task 'Reducer 26' is a cross prod }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -1848,7 +1848,7 @@ Warning: Map Join MAPJOIN[1187][bigTable=?] in task 'Reducer 26' is a cross prod }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2270,13 +2270,13 @@ Warning: Map Join MAPJOIN[1187][bigTable=?] in task 'Reducer 26' is a cross prod }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -3239,7 +3239,7 @@ Warning: Map Join MAPJOIN[1187][bigTable=?] in task 'Reducer 26' is a cross prod }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -3661,13 +3661,13 @@ Warning: Map Join MAPJOIN[1187][bigTable=?] in task 'Reducer 26' is a cross prod }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -4630,7 +4630,7 @@ Warning: Map Join MAPJOIN[1187][bigTable=?] in task 'Reducer 26' is a cross prod }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -5052,13 +5052,13 @@ Warning: Map Join MAPJOIN[1187][bigTable=?] in task 'Reducer 26' is a cross prod }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query15.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query15.q.out index fa0180d717cb..dc20a8819e17 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query15.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query15.q.out @@ -1456,7 +1456,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query16.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query16.q.out index ad8270f8183c..1bac225618b7 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query16.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query16.q.out @@ -4,6 +4,259 @@ { "id": "0", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", + "table": [ + "default", + "customer_address" + ], + "table:alias": "customer_address", + "inputs": [], + "rowCount": 40000000, + "avgRowSize": 607, + "rowType": { + "fields": [ + { + "type": "BIGINT", + "nullable": false, + "name": "ca_address_sk" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 16, + "name": "ca_address_id" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_street_number" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 60, + "name": "ca_street_name" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 15, + "name": "ca_street_type" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_suite_number" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 60, + "name": "ca_city" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 30, + "name": "ca_county" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 2, + "name": "ca_state" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_zip" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 20, + "name": "ca_country" + }, + { + "type": "DECIMAL", + "nullable": true, + "precision": 5, + "scale": 2, + "name": "ca_gmt_offset" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 20, + "name": "ca_location_type" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "BLOCK__OFFSET__INSIDE__FILE" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 2147483647, + "name": "INPUT__FILE__NAME" + }, + { + "fields": [ + { + "type": "BIGINT", + "nullable": true, + "name": "writeid" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "bucketid" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "rowid" + } + ], + "nullable": true, + "name": "ROW__ID" + }, + { + "type": "BOOLEAN", + "nullable": true, + "name": "ROW__IS__DELETED" + } + ], + "nullable": false + }, + "colStats": [ + { + "name": "ca_address_sk", + "ndv": 40618307, + "minValue": 1, + "maxValue": 40000000 + }, + { + "name": "ca_state", + "ndv": 53 + }, + { + "name": "ca_address_id", + "ndv": 39667899 + }, + { + "name": "ca_street_number", + "ndv": 1014 + }, + { + "name": "ca_street_name", + "ndv": 8358 + }, + { + "name": "ca_street_type", + "ndv": 21 + }, + { + "name": "ca_suite_number", + "ndv": 76 + }, + { + "name": "ca_city", + "ndv": 985 + }, + { + "name": "ca_county", + "ndv": 1930 + }, + { + "name": "ca_zip", + "ndv": 9538 + }, + { + "name": "ca_country", + "ndv": 2 + }, + { + "name": "ca_gmt_offset", + "ndv": 6, + "minValue": -10, + "maxValue": -5 + }, + { + "name": "ca_location_type", + "ndv": 4 + } + ] + }, + { + "id": "1", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 8, + "name": "$8" + }, + { + "literal": "NY", + "type": { + "type": "CHAR", + "nullable": false, + "precision": 2 + } + } + ] + }, + "rowCount": 6000000 + }, + { + "id": "2", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", + "fields": [ + "ca_address_sk", + "ca_state" + ], + "exprs": [ + { + "input": 0, + "name": "$0" + }, + { + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": "NY", + "type": { + "type": "CHAR", + "nullable": false, + "precision": 2 + } + } + ], + "type": { + "type": "CHAR", + "nullable": true, + "precision": 2 + } + } + ], + "rowCount": 6000000 + }, + { + "id": "3", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", "catalog_sales" @@ -465,7 +718,7 @@ ] }, { - "id": "1", + "id": "4", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -518,7 +771,7 @@ "rowCount": 3.1508010502623005E10 }, { - "id": "2", + "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "cs_ship_date_sk", @@ -562,95 +815,168 @@ "rowCount": 3.1508010502623005E10 }, { - "id": "3", + "id": "6", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "customer_address" + "date_dim" ], - "table:alias": "customer_address", + "table:alias": "date_dim", "inputs": [], - "rowCount": 40000000, - "avgRowSize": 607, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "ca_address_sk" + "name": "d_date_sk" + }, + { + "type": "VARCHAR", + "nullable": false, + "precision": 2147483647, + "name": "d_date_id" + }, + { + "type": "DATE", + "nullable": true, + "name": "d_date" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_month_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_week_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_quarter_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_year" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_dow" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_moy" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_qoy" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_year" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_quarter_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 16, - "name": "ca_address_id" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_street_number" + "precision": 6, + "name": "d_quarter_name" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 60, - "name": "ca_street_name" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 15, - "name": "ca_street_type" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_suite_number" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 60, - "name": "ca_city" + "name": "d_first_dom" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 30, - "name": "ca_county" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 2, - "name": "ca_state" + "precision": 1, + "name": "d_current_day" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_zip" + "precision": 1, + "name": "d_current_week" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 20, - "name": "ca_country" + "precision": 1, + "name": "d_current_month" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 5, - "scale": 2, - "name": "ca_gmt_offset" + "precision": 1, + "name": "d_current_quarter" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "ca_location_type" + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -694,601 +1020,214 @@ }, "colStats": [ { - "name": "ca_address_sk", - "ndv": 40618307, + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 + }, + { + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 + }, + { + "name": "d_date_id", + "ndv": 71022 + }, + { + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 + }, + { + "name": "d_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 40000000 + "maxValue": 10436 }, { - "name": "ca_state", - "ndv": 53 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "ca_address_id", - "ndv": 39667899 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "ca_street_number", - "ndv": 1014 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "ca_street_name", - "ndv": 8358 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "ca_street_type", - "ndv": 21 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 }, { - "name": "ca_suite_number", - "ndv": 76 + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "ca_city", - "ndv": 985 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "ca_county", - "ndv": 1930 + "name": "d_fy_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "ca_zip", - "ndv": 9538 + "name": "d_fy_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "ca_country", - "ndv": 2 + "name": "d_day_name", + "ndv": 7 }, { - "name": "ca_gmt_offset", - "ndv": 6, - "minValue": -10, - "maxValue": -5 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "ca_location_type", - "ndv": 4 - } - ] - }, - { - "id": "4", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" + "name": "d_holiday", + "ndv": 2 }, - "operands": [ - { - "input": 8, - "name": "$8" - }, - { - "literal": "NY", - "type": { - "type": "CHAR", - "nullable": false, - "precision": 2 - } - } - ] - }, - "rowCount": 6000000 - }, - { - "id": "5", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", - "fields": [ - "ca_address_sk", - "ca_state" - ], - "exprs": [ { - "input": 0, - "name": "$0" - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "literal": "NY", - "type": { - "type": "CHAR", - "nullable": false, - "precision": 2 - } - } - ], - "type": { - "type": "CHAR", - "nullable": true, - "precision": 2 - } - } - ], - "rowCount": 6000000 - }, - { - "id": "6", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 1, - "name": "$1" - }, - { - "input": 7, - "name": "$7" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "2", - "5" - ], - "rowCount": 28357209452360700 - }, - { - "id": "7", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", - "table": [ - "default", - "call_center" - ], - "table:alias": "call_center", - "inputs": [], - "rowCount": 60, - "avgRowSize": 1483, - "rowType": { - "fields": [ - { - "type": "BIGINT", - "nullable": false, - "name": "cc_call_center_sk" - }, - { - "type": "VARCHAR", - "nullable": false, - "precision": 2147483647, - "name": "cc_call_center_id" - }, - { - "type": "DATE", - "nullable": true, - "name": "cc_rec_start_date" - }, - { - "type": "DATE", - "nullable": true, - "name": "cc_rec_end_date" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "cc_closed_date_sk" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "cc_open_date_sk" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "cc_name" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "cc_class" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "cc_employees" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "cc_sq_ft" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 20, - "name": "cc_hours" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 40, - "name": "cc_manager" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "cc_mkt_id" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 50, - "name": "cc_mkt_class" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 100, - "name": "cc_mkt_desc" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 40, - "name": "cc_market_manager" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "cc_division" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "cc_division_name" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "cc_company" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 50, - "name": "cc_company_name" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "cc_street_number" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 60, - "name": "cc_street_name" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 15, - "name": "cc_street_type" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "cc_suite_number" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 60, - "name": "cc_city" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 30, - "name": "cc_county" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 2, - "name": "cc_state" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "cc_zip" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 20, - "name": "cc_country" - }, - { - "type": "DECIMAL", - "nullable": true, - "precision": 5, - "scale": 2, - "name": "cc_gmt_offset" - }, - { - "type": "DECIMAL", - "nullable": true, - "precision": 5, - "scale": 2, - "name": "cc_tax_percentage" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "BLOCK__OFFSET__INSIDE__FILE" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 2147483647, - "name": "INPUT__FILE__NAME" - }, - { - "fields": [ - { - "type": "BIGINT", - "nullable": true, - "name": "writeid" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "bucketid" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "rowid" - } - ], - "nullable": true, - "name": "ROW__ID" - }, - { - "type": "BOOLEAN", - "nullable": true, - "name": "ROW__IS__DELETED" - } - ], - "nullable": false - }, - "colStats": [ - { - "name": "cc_call_center_sk", - "ndv": 60, - "minValue": 1, - "maxValue": 60 - }, - { - "name": "cc_county", - "ndv": 25 - }, - { - "name": "cc_call_center_id", - "ndv": 30 - }, - { - "name": "cc_rec_start_date", - "ndv": 0, - "minValue": 10227, - "maxValue": 11688 - }, - { - "name": "cc_rec_end_date", - "ndv": 0, - "minValue": 10957, - "maxValue": 11687 - }, - { - "name": "cc_closed_date_sk", - "ndv": 1, - "minValue": null, - "maxValue": null - }, - { - "name": "cc_open_date_sk", - "ndv": 30, - "minValue": 2450794, - "maxValue": 2451146 - }, - { - "name": "cc_name", - "ndv": 30 - }, - { - "name": "cc_class", - "ndv": 3 - }, - { - "name": "cc_employees", - "ndv": 43, - "minValue": 5412266, - "maxValue": 1963174023 - }, - { - "name": "cc_sq_ft", - "ndv": 47, - "minValue": -2108783316, - "maxValue": 2044891959 - }, - { - "name": "cc_hours", - "ndv": 3 - }, - { - "name": "cc_manager", - "ndv": 42 - }, - { - "name": "cc_mkt_id", - "ndv": 6, - "minValue": 1, - "maxValue": 6 - }, - { - "name": "cc_mkt_class", - "ndv": 52 - }, - { - "name": "cc_mkt_desc", - "ndv": 48 - }, - { - "name": "cc_market_manager", - "ndv": 48 - }, - { - "name": "cc_division", - "ndv": 6, - "minValue": 1, - "maxValue": 6 - }, - { - "name": "cc_division_name", - "ndv": 6 - }, - { - "name": "cc_company", - "ndv": 6, - "minValue": 1, - "maxValue": 6 - }, - { - "name": "cc_company_name", - "ndv": 6 + "name": "d_weekend", + "ndv": 2 }, { - "name": "cc_street_number", - "ndv": 30 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "cc_street_name", - "ndv": 29 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "cc_street_type", - "ndv": 14 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "cc_suite_number", - "ndv": 26 + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 }, { - "name": "cc_city", - "ndv": 25 + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 }, { - "name": "cc_state", - "ndv": 19 + "name": "d_current_day", + "ndv": 1 }, { - "name": "cc_zip", - "ndv": 30 + "name": "d_current_week", + "ndv": 1 }, { - "name": "cc_country", - "ndv": 1 + "name": "d_current_month", + "ndv": 2 }, { - "name": "cc_gmt_offset", - "ndv": 4, - "minValue": -8, - "maxValue": -5 + "name": "d_current_quarter", + "ndv": 2 }, { - "name": "cc_tax_percentage", - "ndv": 13, - "minValue": 0, - "maxValue": 0.12 + "name": "d_current_year", + "ndv": 2 } ] }, { - "id": "8", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "IN", - "kind": "OTHER_FUNCTION", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 25, - "name": "$25" - }, - { - "literal": "Daviess County", - "type": { - "type": "VARCHAR", - "nullable": false, - "precision": 30 - } - }, + "id": "7", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ { - "literal": "Franklin Parish", + "literal": false, "type": { - "type": "VARCHAR", - "nullable": false, - "precision": 30 + "type": "BOOLEAN", + "nullable": false } }, { - "literal": "Huron County", + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "VARCHAR", - "nullable": false, - "precision": 30 + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 } }, { - "literal": "Levy County", + "literal": 986083200000, "type": { - "type": "VARCHAR", + "type": "TIMESTAMP", "nullable": false, - "precision": 30 + "precision": 9 } }, { - "literal": "Ziebach County", + "literal": 991267200000, "type": { - "type": "VARCHAR", + "type": "TIMESTAMP", "nullable": false, - "precision": 30 + "precision": 9 } } ] }, - "rowCount": 15 + "rowCount": 18262.25 }, { - "id": "9", + "id": "8", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "cc_call_center_sk", - "cc_county" + "d_date_sk", + "d_date" ], "exprs": [ { @@ -1296,11 +1235,40 @@ "name": "$0" }, { - "input": 25, - "name": "$25" + "input": 2, + "name": "$2" } ], - "rowCount": 15 + "rowCount": 18262.25 + }, + { + "id": "9", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 0, + "name": "$0" + }, + { + "input": 7, + "name": "$7" + } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "5", + "8" + ], + "rowCount": 8.631107472022905E13 }, { "id": "10", @@ -1313,12 +1281,12 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 3, + "name": "$3" }, { - "input": 9, - "name": "$9" + "input": 0, + "name": "$0" } ] }, @@ -1326,174 +1294,201 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "6", + "2", "9" ], - "rowCount": 63803721267811576 + "rowCount": 7.767996724820614E19 }, { "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "call_center" ], - "table:alias": "date_dim", + "table:alias": "call_center", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 60, + "avgRowSize": 1483, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "cc_call_center_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "cc_call_center_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" + "name": "cc_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_month_seq" + "name": "cc_rec_end_date" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_week_seq" + "name": "cc_closed_date_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_quarter_seq" + "name": "cc_open_date_sk" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_year" + "precision": 50, + "name": "cc_name" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_dow" + "precision": 50, + "name": "cc_class" }, { "type": "INTEGER", "nullable": true, - "name": "d_moy" + "name": "cc_employees" }, { "type": "INTEGER", "nullable": true, - "name": "d_dom" + "name": "cc_sq_ft" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_qoy" + "precision": 20, + "name": "cc_hours" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_fy_year" + "precision": 40, + "name": "cc_manager" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_quarter_seq" + "name": "cc_mkt_id" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 50, + "name": "cc_mkt_class" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 100, + "name": "cc_mkt_desc" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 40, + "name": "cc_market_manager" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "cc_division" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "cc_division_name" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "cc_company" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "cc_company_name" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 10, + "name": "cc_street_number" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" + "precision": 60, + "name": "cc_street_name" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_first_dom" + "precision": 15, + "name": "cc_street_type" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_last_dom" + "precision": 10, + "name": "cc_suite_number" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_same_day_ly" + "precision": 60, + "name": "cc_city" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_same_day_lq" + "precision": 30, + "name": "cc_county" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 2, + "name": "cc_state" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "cc_zip" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 20, + "name": "cc_country" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "precision": 5, + "scale": 2, + "name": "cc_gmt_offset" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 5, + "scale": 2, + "name": "cc_tax_percentage" }, { "type": "BIGINT", @@ -1537,150 +1532,152 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 + "name": "cc_call_center_sk", + "ndv": 60, + "minValue": 1, + "maxValue": 60 + }, + { + "name": "cc_county", + "ndv": 25 + }, + { + "name": "cc_call_center_id", + "ndv": 30 + }, + { + "name": "cc_rec_start_date", + "ndv": 4, + "minValue": 10227, + "maxValue": 11688 + }, + { + "name": "cc_rec_end_date", + "ndv": 3, + "minValue": 10957, + "maxValue": 11687 }, { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 + "name": "cc_closed_date_sk", + "ndv": 1, + "minValue": null, + "maxValue": null }, { - "name": "d_date_id", - "ndv": 71022 + "name": "cc_open_date_sk", + "ndv": 30, + "minValue": 2450794, + "maxValue": 2451146 }, { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 + "name": "cc_name", + "ndv": 30 }, { - "name": "d_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "cc_class", + "ndv": 3 }, { - "name": "d_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "cc_employees", + "ndv": 43, + "minValue": 5412266, + "maxValue": 1963174023 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "cc_sq_ft", + "ndv": 47, + "minValue": -2108783316, + "maxValue": 2044891959 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "cc_hours", + "ndv": 3 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "cc_manager", + "ndv": 42 }, { - "name": "d_dom", - "ndv": 31, + "name": "cc_mkt_id", + "ndv": 6, "minValue": 1, - "maxValue": 31 + "maxValue": 6 }, { - "name": "d_qoy", - "ndv": 4, - "minValue": 1, - "maxValue": 4 + "name": "cc_mkt_class", + "ndv": 52 }, { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "cc_mkt_desc", + "ndv": 48 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "cc_market_manager", + "ndv": 48 }, { - "name": "d_fy_week_seq", - "ndv": 11297, + "name": "cc_division", + "ndv": 6, "minValue": 1, - "maxValue": 10436 + "maxValue": 6 }, { - "name": "d_day_name", - "ndv": 7 + "name": "cc_division_name", + "ndv": 6 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "cc_company", + "ndv": 6, + "minValue": 1, + "maxValue": 6 }, { - "name": "d_holiday", - "ndv": 2 + "name": "cc_company_name", + "ndv": 6 }, { - "name": "d_weekend", - "ndv": 2 + "name": "cc_street_number", + "ndv": 30 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "cc_street_name", + "ndv": 29 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "cc_street_type", + "ndv": 14 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "cc_suite_number", + "ndv": 26 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "cc_city", + "ndv": 25 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "cc_state", + "ndv": 19 }, { - "name": "d_current_day", - "ndv": 1 + "name": "cc_zip", + "ndv": 30 }, { - "name": "d_current_week", + "name": "cc_country", "ndv": 1 }, { - "name": "d_current_month", - "ndv": 2 - }, - { - "name": "d_current_quarter", - "ndv": 2 + "name": "cc_gmt_offset", + "ndv": 4, + "minValue": -8, + "maxValue": -5 }, { - "name": "d_current_year", - "ndv": 2 + "name": "cc_tax_percentage", + "ndv": 13, + "minValue": 0, + "maxValue": 0.12 } ] }, @@ -1689,62 +1686,65 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", + "name": "IN", + "kind": "OTHER_FUNCTION", "syntax": "SPECIAL" }, "operands": [ { - "literal": false, + "input": 25, + "name": "$25" + }, + { + "literal": "Daviess County", "type": { - "type": "BOOLEAN", - "nullable": false + "type": "VARCHAR", + "nullable": false, + "precision": 30 } }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], + "literal": "Franklin Parish", "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 + "type": "VARCHAR", + "nullable": false, + "precision": 30 } }, { - "literal": 986083200000, + "literal": "Huron County", "type": { - "type": "TIMESTAMP", + "type": "VARCHAR", "nullable": false, - "precision": 9 + "precision": 30 } }, { - "literal": 991267200000, + "literal": "Levy County", "type": { - "type": "TIMESTAMP", + "type": "VARCHAR", "nullable": false, - "precision": 9 + "precision": 30 + } + }, + { + "literal": "Ziebach County", + "type": { + "type": "VARCHAR", + "nullable": false, + "precision": 30 } } ] }, - "rowCount": 18262.25 + "rowCount": 15 }, { "id": "13", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk", - "d_date" + "cc_call_center_sk", + "cc_county" ], "exprs": [ { @@ -1752,11 +1752,11 @@ "name": "$0" }, { - "input": 2, - "name": "$2" + "input": 25, + "name": "$25" } ], - "rowCount": 18262.25 + "rowCount": 15 }, { "id": "14", @@ -1769,8 +1769,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 4, + "name": "$4" }, { "input": 11, @@ -1785,7 +1785,7 @@ "10", "13" ], - "rowCount": 1.7477992630846377E20 + "rowCount": 1.747799263084638E20 }, { "id": "15", @@ -1806,14 +1806,6 @@ "cc_county" ], "exprs": [ - { - "input": 0, - "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, { "input": 2, "name": "$2" @@ -1834,14 +1826,6 @@ "input": 6, "name": "$6" }, - { - "input": 11, - "name": "$11" - }, - { - "input": 12, - "name": "$12" - }, { "input": 7, "name": "$7" @@ -1857,9 +1841,25 @@ { "input": 10, "name": "$10" + }, + { + "input": 0, + "name": "$0" + }, + { + "input": 1, + "name": "$1" + }, + { + "input": 11, + "name": "$11" + }, + { + "input": 12, + "name": "$12" } ], - "rowCount": 1.7477992630846377E20 + "rowCount": 1.747799263084638E20 }, { "id": "16", diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query17.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query17.q.out index 9e6abc7cb63a..a9e25b6517a4 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query17.q.out @@ -754,7 +754,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1620,7 +1620,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2413,7 +2413,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -3107,13 +3107,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -3475,13 +3475,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query18.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query18.q.out index 47aa0caee84b..4dcacd5f1801 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query18.q.out @@ -869,7 +869,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2314,13 +2314,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query19.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query19.q.out index 3b497bc3fb41..e483b55e0a01 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query19.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query19.q.out @@ -1205,7 +1205,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1644,13 +1644,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -2134,13 +2134,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query2.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query2.q.out index f4f5c942b66a..d40e2be2746a 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query2.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query2.q.out @@ -1263,7 +1263,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2208,7 +2208,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query20.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query20.q.out index 31effb0e1e65..13ce90905910 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query20.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query20.q.out @@ -511,139 +511,164 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "item" + "date_dim" ], - "table:alias": "item", + "table:alias": "date_dim", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "i_rec_start_date" + "name": "d_date" }, { - "type": "DATE", + "type": "INTEGER", "nullable": true, - "name": "i_rec_end_date" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 200, - "name": "i_item_desc" + "name": "d_week_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" + "name": "d_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" + "name": "d_year" }, { "type": "INTEGER", "nullable": true, - "name": "i_brand_id" + "name": "d_dow" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_brand" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "i_class_id" + "name": "d_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_class" + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "d_fy_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_category" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "i_manufact_id" + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 6, + "name": "d_quarter_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "i_container" + "name": "d_first_dom" }, { "type": "INTEGER", "nullable": true, - "name": "i_manager_id" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 1, + "name": "d_current_day" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_week" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_month" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_quarter" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -687,195 +712,221 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 }, { - "name": "i_item_id", - "ndv": 247524 + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 }, { - "name": "i_item_desc", - "ndv": 341846 + "name": "d_date_id", + "ndv": 71022 }, { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 }, { - "name": "i_class", - "ndv": 99 + "name": "d_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_category", - "ndv": 11 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 }, { - "name": "i_brand", - "ndv": 742 + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "i_class_id", - "ndv": 16, - "minValue": 1, - "maxValue": 16 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_category_id", - "ndv": 10, + "name": "d_fy_quarter_seq", + "ndv": 808, "minValue": 1, - "maxValue": 10 + "maxValue": 801 }, { - "name": "i_manufact_id", - "ndv": 987, + "name": "d_fy_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 1000 + "maxValue": 10436 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "d_day_name", + "ndv": 7 }, { - "name": "i_size", - "ndv": 8 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "d_holiday", + "ndv": 2 }, { - "name": "i_color", - "ndv": 95 + "name": "d_weekend", + "ndv": 2 }, { - "name": "i_units", - "ndv": 21 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "i_container", - "ndv": 2 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "i_product_name", - "ndv": 461487 - } - ] - }, - { + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 + }, + { + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 + }, + { + "name": "d_current_day", + "ndv": 1 + }, + { + "name": "d_current_week", + "ndv": 1 + }, + { + "name": "d_current_month", + "ndv": 2 + }, + { + "name": "d_current_quarter", + "ndv": 2 + }, + { + "name": "d_current_year", + "ndv": 2 + } + ] + }, + { "id": "4", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "IN", - "kind": "OTHER_FUNCTION", + "name": "BETWEEN", + "kind": "BETWEEN", "syntax": "SPECIAL" }, "operands": [ { - "input": 12, - "name": "$12" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "literal": "Books", + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "CHAR", - "nullable": false, - "precision": 5 + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 } }, { - "literal": "Jewelry", + "literal": 979257600000, "type": { - "type": "CHAR", + "type": "TIMESTAMP", "nullable": false, - "precision": 7 + "precision": 9 } }, { - "literal": "Sports", + "literal": 981849600000, "type": { - "type": "CHAR", + "type": "TIMESTAMP", "nullable": false, - "precision": 6 + "precision": 9 } } ] }, - "rowCount": 115500 + "rowCount": 18262.25 }, { "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk", - "i_item_id", - "i_item_desc", - "i_current_price", - "i_class", - "i_category" + "d_date_sk" ], "exprs": [ { "input": 0, "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, - { - "input": 4, - "name": "$4" - }, - { - "input": 5, - "name": "$5" - }, - { - "input": 10, - "name": "$10" - }, - { - "input": 12, - "name": "$12" } ], - "rowCount": 115500 + "rowCount": 18262.25 }, { "id": "6", @@ -888,8 +939,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 2, + "name": "$2" }, { "input": 3, @@ -904,171 +955,146 @@ "2", "5" ], - "rowCount": 6.705571624723125E14 + "rowCount": 1.0602495705939384E14 }, { "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "item" ], - "table:alias": "date_dim", + "table:alias": "item", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 462000, + "avgRowSize": 1033, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "i_item_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "i_item_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_month_seq" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_week_seq" + "name": "i_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_quarter_seq" + "name": "i_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_year" + "precision": 200, + "name": "i_item_desc" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_dow" + "precision": 7, + "scale": 2, + "name": "i_current_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_moy" + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" }, { "type": "INTEGER", "nullable": true, - "name": "d_dom" + "name": "i_brand_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_qoy" + "precision": 50, + "name": "i_brand" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_year" + "name": "i_class_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 50, + "name": "i_class" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "i_category" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "i_manufact_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "i_manufact" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 20, + "name": "i_size" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_first_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_last_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_ly" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_lq" + "precision": 20, + "name": "i_formulation" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "i_units" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 10, + "name": "i_container" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "name": "i_manager_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -1112,150 +1138,112 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 - }, - { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 - }, - { - "name": "d_week_seq", - "ndv": 11297, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 10436 + "maxValue": 462000 }, { - "name": "d_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "i_class", + "ndv": 99 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "i_category", + "ndv": 11 }, { - "name": "d_qoy", + "name": "i_rec_start_date", "ndv": 4, - "minValue": 1, - "maxValue": 4 - }, - { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 - }, - { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "minValue": 10161, + "maxValue": 11622 }, { - "name": "d_fy_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "d_day_name", - "ndv": 7 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 }, { - "name": "d_holiday", - "ndv": 2 + "name": "i_brand", + "ndv": 742 }, { - "name": "d_weekend", - "ndv": 2 + "name": "i_class_id", + "ndv": 16, + "minValue": 1, + "maxValue": 16 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "i_manufact_id", + "ndv": 987, + "minValue": 1, + "maxValue": 1000 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "i_manufact", + "ndv": 1004 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "i_size", + "ndv": 8 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "i_formulation", + "ndv": 344236 }, { - "name": "d_current_day", - "ndv": 1 + "name": "i_color", + "ndv": 95 }, { - "name": "d_current_week", - "ndv": 1 + "name": "i_units", + "ndv": 21 }, { - "name": "d_current_month", + "name": "i_container", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_current_year", - "ndv": 2 + "name": "i_product_name", + "ndv": 461487 } ] }, @@ -1264,69 +1252,81 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", + "name": "IN", + "kind": "OTHER_FUNCTION", "syntax": "SPECIAL" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } + "input": 12, + "name": "$12" }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], + "literal": "Books", "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 + "type": "CHAR", + "nullable": false, + "precision": 5 } }, { - "literal": 979257600000, + "literal": "Jewelry", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 7 } }, { - "literal": 981849600000, + "literal": "Sports", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 6 } } ] }, - "rowCount": 18262.25 + "rowCount": 115500 }, { "id": "9", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "i_item_sk", + "i_item_id", + "i_item_desc", + "i_current_price", + "i_class", + "i_category" ], "exprs": [ { "input": 0, "name": "$0" + }, + { + "input": 1, + "name": "$1" + }, + { + "input": 4, + "name": "$4" + }, + { + "input": 5, + "name": "$5" + }, + { + "input": 10, + "name": "$10" + }, + { + "input": 12, + "name": "$12" } ], - "rowCount": 18262.25 + "rowCount": 115500 }, { "id": "10", @@ -1339,12 +1339,12 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 0, + "name": "$0" }, { - "input": 9, - "name": "$9" + "input": 4, + "name": "$4" } ] }, @@ -1361,11 +1361,11 @@ "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ - 4, 5, 6, 7, - 8 + 8, + 9 ], "aggs": [ { diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query3.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query3.q.out index 58d5621c5d8b..7bb718828ba8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query3.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query3.q.out @@ -587,13 +587,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -976,7 +976,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query4.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query4.q.out index 344b86e909c3..453170c2c18b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query4.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query4.q.out @@ -847,7 +847,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query5.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query5.q.out index ad651ccf40c2..58f01d7257b5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query5.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query5.q.out @@ -926,180 +926,164 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "store" + "date_dim" ], - "table:alias": "store", + "table:alias": "date_dim", "inputs": [], - "rowCount": 1704, - "avgRowSize": 1375, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "s_store_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "s_store_id" - }, - { - "type": "DATE", - "nullable": true, - "name": "s_rec_start_date" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "s_rec_end_date" + "name": "d_date" }, { - "type": "BIGINT", + "type": "INTEGER", "nullable": true, - "name": "s_closed_date_sk" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "s_store_name" + "name": "d_week_seq" }, { "type": "INTEGER", "nullable": true, - "name": "s_number_employees" + "name": "d_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "s_floor_space" + "name": "d_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 20, - "name": "s_hours" + "name": "d_dow" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 40, - "name": "s_manager" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "s_market_id" + "name": "d_dom" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 100, - "name": "s_geography_class" + "name": "d_qoy" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 100, - "name": "s_market_desc" + "name": "d_fy_year" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 40, - "name": "s_market_manager" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "s_division_id" + "name": "d_fy_week_seq" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 50, - "name": "s_division_name" + "precision": 9, + "name": "d_day_name" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "s_company_id" + "precision": 6, + "name": "d_quarter_name" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 50, - "name": "s_company_name" + "precision": 1, + "name": "d_holiday" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 10, - "name": "s_street_number" + "precision": 1, + "name": "d_weekend" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 60, - "name": "s_street_name" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 15, - "name": "s_street_type" + "name": "d_first_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "s_suite_number" + "name": "d_last_dom" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 60, - "name": "s_city" + "name": "d_same_day_ly" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 30, - "name": "s_county" + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 2, - "name": "s_state" + "precision": 1, + "name": "d_current_day" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "s_zip" + "precision": 1, + "name": "d_current_week" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 20, - "name": "s_country" + "precision": 1, + "name": "d_current_month" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 5, - "scale": 2, - "name": "s_gmt_offset" + "precision": 1, + "name": "d_current_quarter" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 5, - "scale": 2, - "name": "s_tax_percentage" + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -1143,166 +1127,224 @@ }, "colStats": [ { - "name": "s_store_sk", - "ndv": 1736, - "minValue": 1, - "maxValue": 1704 - }, - { - "name": "s_store_id", - "ndv": 879 + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 }, { - "name": "s_rec_start_date", - "ndv": 0, - "minValue": 9933, - "maxValue": 11394 + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 }, { - "name": "s_rec_end_date", - "ndv": 0, - "minValue": 10663, - "maxValue": 11393 + "name": "d_date_id", + "ndv": 71022 }, { - "name": "s_closed_date_sk", - "ndv": 263, - "minValue": 2450820, - "maxValue": 2451314 + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 }, { - "name": "s_store_name", - "ndv": 11 + "name": "d_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "s_number_employees", - "ndv": 100, - "minValue": 200, - "maxValue": 300 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "s_floor_space", - "ndv": 1289, - "minValue": 5000201, - "maxValue": 9997773 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "s_hours", - "ndv": 4 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "s_manager", - "ndv": 1245 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "s_market_id", - "ndv": 10, + "name": "d_dom", + "ndv": 31, "minValue": 1, - "maxValue": 10 + "maxValue": 31 }, { - "name": "s_geography_class", - "ndv": 2 + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "s_market_desc", - "ndv": 1311 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "s_market_manager", - "ndv": 1236 + "name": "d_fy_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "s_division_id", - "ndv": 1, + "name": "d_fy_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 1 + "maxValue": 10436 }, { - "name": "s_division_name", - "ndv": 2 + "name": "d_day_name", + "ndv": 7 }, { - "name": "s_company_id", - "ndv": 1, - "minValue": 1, - "maxValue": 1 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "s_company_name", + "name": "d_holiday", "ndv": 2 }, { - "name": "s_street_number", - "ndv": 736 + "name": "d_weekend", + "ndv": 2 }, { - "name": "s_street_name", - "ndv": 851 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "s_street_type", - "ndv": 21 - }, + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 + }, { - "name": "s_suite_number", - "ndv": 76 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "s_city", - "ndv": 267 + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 }, { - "name": "s_county", - "ndv": 128 + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 }, { - "name": "s_state", - "ndv": 44 + "name": "d_current_day", + "ndv": 1 }, { - "name": "s_zip", - "ndv": 983 + "name": "d_current_week", + "ndv": 1 }, { - "name": "s_country", + "name": "d_current_month", "ndv": 2 }, { - "name": "s_gmt_offset", - "ndv": 5, - "minValue": -9, - "maxValue": -5 + "name": "d_current_quarter", + "ndv": 2 }, { - "name": "s_tax_percentage", - "ndv": 12, - "minValue": 0, - "maxValue": 0.11 + "name": "d_current_year", + "ndv": 2 } ] }, { "id": "9", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + { + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], + "type": { + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 902188800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + }, + { + "literal": 903398400000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + } + ] + }, + "rowCount": 18262.25 + }, + { + "id": "10", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "s_store_sk", - "s_store_id" + "d_date_sk" ], "exprs": [ { "input": 0, "name": "$0" - }, - { - "input": 1, - "name": "$1" } ], - "rowCount": 1704 + "rowCount": 18262.25 }, { - "id": "10", + "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", "condition": { "op": { @@ -1312,8 +1354,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 1, + "name": "$1" }, { "input": 6, @@ -1326,173 +1368,189 @@ "cost": "not available", "inputs": [ "7", - "9" + "10" ], - "rowCount": 1.880786982425933E13 + "rowCount": 2.015692609730516E14 }, { - "id": "11", + "id": "12", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "store" ], - "table:alias": "date_dim", + "table:alias": "store", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 1704, + "avgRowSize": 1375, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "s_store_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "s_store_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" + "name": "s_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_month_seq" + "name": "s_rec_end_date" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_week_seq" + "name": "s_closed_date_sk" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_quarter_seq" + "precision": 50, + "name": "s_store_name" }, { "type": "INTEGER", "nullable": true, - "name": "d_year" + "name": "s_number_employees" }, { "type": "INTEGER", "nullable": true, - "name": "d_dow" + "name": "s_floor_space" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_moy" + "precision": 20, + "name": "s_hours" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_dom" + "precision": 40, + "name": "s_manager" }, { "type": "INTEGER", "nullable": true, - "name": "d_qoy" + "name": "s_market_id" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_fy_year" + "precision": 100, + "name": "s_geography_class" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 100, + "name": "s_market_desc" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 40, + "name": "s_market_manager" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "s_division_id" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "s_division_name" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "s_company_id" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "s_company_name" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 10, + "name": "s_street_number" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" + "precision": 60, + "name": "s_street_name" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_first_dom" + "precision": 15, + "name": "s_street_type" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_last_dom" + "precision": 10, + "name": "s_suite_number" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_same_day_ly" + "precision": 60, + "name": "s_city" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_same_day_lq" + "precision": 30, + "name": "s_county" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 2, + "name": "s_state" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "s_zip" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 20, + "name": "s_country" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "precision": 5, + "scale": 2, + "name": "s_gmt_offset" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 5, + "scale": 2, + "name": "s_tax_percentage" }, { "type": "BIGINT", @@ -1536,221 +1594,163 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 + "name": "s_store_sk", + "ndv": 1736, + "minValue": 1, + "maxValue": 1704 }, { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 + "name": "s_store_id", + "ndv": 879 }, { - "name": "d_date_id", - "ndv": 71022 + "name": "s_rec_start_date", + "ndv": 4, + "minValue": 9933, + "maxValue": 11394 }, { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 + "name": "s_rec_end_date", + "ndv": 3, + "minValue": 10663, + "maxValue": 11393 }, { - "name": "d_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "s_closed_date_sk", + "ndv": 263, + "minValue": 2450820, + "maxValue": 2451314 }, { - "name": "d_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "s_store_name", + "ndv": 11 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "s_number_employees", + "ndv": 100, + "minValue": 200, + "maxValue": 300 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "s_floor_space", + "ndv": 1289, + "minValue": 5000201, + "maxValue": 9997773 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "s_hours", + "ndv": 4 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "s_manager", + "ndv": 1245 }, { - "name": "d_qoy", - "ndv": 4, + "name": "s_market_id", + "ndv": 10, "minValue": 1, - "maxValue": 4 + "maxValue": 10 }, { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "s_geography_class", + "ndv": 2 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "s_market_desc", + "ndv": 1311 }, { - "name": "d_fy_week_seq", - "ndv": 11297, + "name": "s_market_manager", + "ndv": 1236 + }, + { + "name": "s_division_id", + "ndv": 1, "minValue": 1, - "maxValue": 10436 + "maxValue": 1 }, { - "name": "d_day_name", - "ndv": 7 + "name": "s_division_name", + "ndv": 2 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "s_company_id", + "ndv": 1, + "minValue": 1, + "maxValue": 1 }, { - "name": "d_holiday", + "name": "s_company_name", "ndv": 2 }, { - "name": "d_weekend", - "ndv": 2 + "name": "s_street_number", + "ndv": 736 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "s_street_name", + "ndv": 851 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "s_street_type", + "ndv": 21 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "s_suite_number", + "ndv": 76 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "s_city", + "ndv": 267 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "s_county", + "ndv": 128 }, { - "name": "d_current_day", - "ndv": 1 + "name": "s_state", + "ndv": 44 }, { - "name": "d_current_week", - "ndv": 1 + "name": "s_zip", + "ndv": 983 }, { - "name": "d_current_month", + "name": "s_country", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "s_gmt_offset", + "ndv": 5, + "minValue": -9, + "maxValue": -5 }, { - "name": "d_current_year", - "ndv": 2 + "name": "s_tax_percentage", + "ndv": 12, + "minValue": 0, + "maxValue": 0.11 } ] }, - { - "id": "12", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" - }, - "operands": [ - { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 902188800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - }, - { - "literal": 903398400000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - } - ] - }, - "rowCount": 18262.25 - }, { "id": "13", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "s_store_sk", + "s_store_id" ], "exprs": [ { "input": 0, "name": "$0" + }, + { + "input": 1, + "name": "$1" } ], - "rowCount": 18262.25 + "rowCount": 1704 }, { "id": "14", @@ -1763,12 +1763,12 @@ }, "operands": [ { - "input": 1, - "name": "$1" + "input": 0, + "name": "$0" }, { - "input": 8, - "name": "$8" + "input": 7, + "name": "$7" } ] }, @@ -1776,7 +1776,7 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "10", + "11", "13" ], "rowCount": 51521103104711992 @@ -1785,7 +1785,7 @@ "id": "15", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ - 7 + 8 ], "aggs": [ { @@ -3057,6 +3057,107 @@ }, { "id": "25", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + { + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], + "type": { + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 902188800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + }, + { + "literal": 903398400000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + } + ] + }, + "inputs": [ + "8" + ], + "rowCount": 18262.25 + }, + { + "id": "26", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", + "fields": [ + "d_date_sk" + ], + "exprs": [ + { + "input": 0, + "name": "$0" + } + ], + "rowCount": 18262.25 + }, + { + "id": "27", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 1, + "name": "$1" + }, + { + "input": 6, + "name": "$6" + } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "24", + "26" + ], + "rowCount": 1.0501012583922942E14 + }, + { + "id": "28", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", @@ -3165,166 +3266,65 @@ "maxValue": 46000 }, { - "name": "cp_catalog_page_id", - "ndv": 45891 - }, - { - "name": "cp_start_date_sk", - "ndv": 91, - "minValue": 2450815, - "maxValue": 2453005 - }, - { - "name": "cp_end_date_sk", - "ndv": 96, - "minValue": 2450844, - "maxValue": 2453186 - }, - { - "name": "cp_department", - "ndv": 2 - }, - { - "name": "cp_catalog_number", - "ndv": 112, - "minValue": 1, - "maxValue": 109 - }, - { - "name": "cp_catalog_page_number", - "ndv": 427, - "minValue": 1, - "maxValue": 425 - }, - { - "name": "cp_description", - "ndv": 44192 - }, - { - "name": "cp_type", - "ndv": 4 - } - ] - }, - { - "id": "26", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", - "fields": [ - "cp_catalog_page_sk", - "cp_catalog_page_id" - ], - "exprs": [ - { - "input": 0, - "name": "$0" - }, - { - "input": 1, - "name": "$1" - } - ], - "rowCount": 46000 - }, - { - "id": "27", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 0, - "name": "$0" - }, - { - "input": 6, - "name": "$6" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "24", - "26" - ], - "rowCount": 264505512114036 - }, - { - "id": "28", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" - }, - "operands": [ - { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 902188800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - }, - { - "literal": 903398400000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - } - ] - }, - "inputs": [ - "11" - ], - "rowCount": 18262.25 + "name": "cp_catalog_page_id", + "ndv": 45891 + }, + { + "name": "cp_start_date_sk", + "ndv": 91, + "minValue": 2450815, + "maxValue": 2453005 + }, + { + "name": "cp_end_date_sk", + "ndv": 96, + "minValue": 2450844, + "maxValue": 2453186 + }, + { + "name": "cp_department", + "ndv": 2 + }, + { + "name": "cp_catalog_number", + "ndv": 112, + "minValue": 1, + "maxValue": 109 + }, + { + "name": "cp_catalog_page_number", + "ndv": 427, + "minValue": 1, + "maxValue": 425 + }, + { + "name": "cp_description", + "ndv": 44192 + }, + { + "name": "cp_type", + "ndv": 4 + } + ] }, { "id": "29", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "cp_catalog_page_sk", + "cp_catalog_page_id" ], "exprs": [ { "input": 0, "name": "$0" + }, + { + "input": 1, + "name": "$1" } ], - "rowCount": 18262.25 + "rowCount": 46000 }, { "id": "30", @@ -3337,12 +3337,12 @@ }, "operands": [ { - "input": 1, - "name": "$1" + "input": 0, + "name": "$0" }, { - "input": 8, - "name": "$8" + "input": 7, + "name": "$7" } ] }, @@ -3353,13 +3353,13 @@ "27", "29" ], - "rowCount": 724569868290683136 + "rowCount": 724569868290683008 }, { "id": "31", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ - 7 + 8 ], "aggs": [ { @@ -3435,7 +3435,7 @@ "name": null } ], - "rowCount": 72456986829068320 + "rowCount": 72456986829068304 }, { "id": "32", @@ -3503,7 +3503,7 @@ ] } ], - "rowCount": 72456986829068320 + "rowCount": 72456986829068304 }, { "id": "33", @@ -5170,6 +5170,107 @@ }, { "id": "46", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + { + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], + "type": { + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 902188800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + }, + { + "literal": 903398400000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + } + ] + }, + "inputs": [ + "8" + ], + "rowCount": 18262.25 + }, + { + "id": "47", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", + "fields": [ + "d_date_sk" + ], + "exprs": [ + { + "input": 0, + "name": "$0" + } + ], + "rowCount": 18262.25 + }, + { + "id": "48", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 1, + "name": "$1" + }, + { + "input": 6, + "name": "$6" + } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "45", + "47" + ], + "rowCount": 2.2244630494994726E21 + }, + { + "id": "49", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", @@ -5385,13 +5486,13 @@ }, { "name": "web_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10089, "maxValue": 11550 }, { "name": "web_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10819, "maxValue": 11549 }, @@ -5498,7 +5599,7 @@ ] }, { - "id": "47", + "id": "50", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "web_site_sk", @@ -5517,7 +5618,7 @@ "rowCount": 84 }, { - "id": "48", + "id": "51", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", "condition": { "op": { @@ -5531,109 +5632,8 @@ "name": "$0" }, { - "input": 6, - "name": "$6" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "45", - "47" - ], - "rowCount": 1.023175655562462E19 - }, - { - "id": "49", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" - }, - "operands": [ - { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 902188800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - }, - { - "literal": 903398400000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - } - ] - }, - "inputs": [ - "11" - ], - "rowCount": 18262.25 - }, - { - "id": "50", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", - "fields": [ - "d_date_sk" - ], - "exprs": [ - { - "input": 0, - "name": "$0" - } - ], - "rowCount": 18262.25 - }, - { - "id": "51", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 1, - "name": "$1" - }, - { - "input": 8, - "name": "$8" + "input": 7, + "name": "$7" } ] }, @@ -5644,13 +5644,13 @@ "48", "50" ], - "rowCount": 2.802823442369336E22 + "rowCount": 2.8028234423693354E22 }, { "id": "52", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ - 7 + 8 ], "aggs": [ { @@ -5726,7 +5726,7 @@ "name": null } ], - "rowCount": 2.802823442369336E21 + "rowCount": 2.8028234423693355E21 }, { "id": "53", @@ -5794,7 +5794,7 @@ ] } ], - "rowCount": 2.802823442369336E21 + "rowCount": 2.8028234423693355E21 }, { "id": "54", @@ -5805,7 +5805,7 @@ "32", "53" ], - "rowCount": 2.8029010514664756E21 + "rowCount": 2.802901051466475E21 }, { "id": "55", @@ -5839,7 +5839,7 @@ "name": "$4" } ], - "rowCount": 2.8029010514664756E21 + "rowCount": 2.802901051466475E21 }, { "id": "56", @@ -5914,7 +5914,7 @@ "name": null } ], - "rowCount": 8.4079398359428E20 + "rowCount": 8.407939835942797E20 }, { "id": "57", @@ -5948,7 +5948,7 @@ "name": "$4" } ], - "rowCount": 8.4079398359428E20 + "rowCount": 8.407939835942797E20 }, { "id": "58", diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query6.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query6.q.out index 327a6ca1232b..eda0be9752a0 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query6.q.out @@ -520,7 +520,7 @@ Warning: Map Join MAPJOIN[168][bigTable=?] in task 'Map 1' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1649,13 +1649,13 @@ Warning: Map Join MAPJOIN[168][bigTable=?] in task 'Map 1' is a cross product }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -2003,13 +2003,13 @@ Warning: Map Join MAPJOIN[168][bigTable=?] in task 'Map 1' is a cross product }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -2679,7 +2679,7 @@ Warning: Map Join MAPJOIN[168][bigTable=?] in task 'Map 1' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query7.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query7.q.out index 88622d672f1d..28e511430522 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query7.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query7.q.out @@ -662,7 +662,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1676,13 +1676,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query8.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query8.q.out index 785effc7ee8e..995024095950 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query8.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query8.q.out @@ -630,7 +630,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -5591,13 +5591,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out b/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out index 82e0a09c6a79..7fd56312c5af 100644 --- a/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out +++ b/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out @@ -60,7 +60,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_nonacid_directinsert_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -171,7 +171,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_mm_directinsert_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -283,7 +283,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_acid_directinsert_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -396,7 +396,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_mm_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -508,7 +508,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_acid_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -621,7 +621,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_mm_unflattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -734,7 +734,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_acid_unflattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -847,7 +847,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_mm_directinsert_unflattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -960,7 +960,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_acid_nondirectinsert_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=84) + Map Join Operator [MAPJOIN_56] (rows=2 width=96) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] From 459e85fc94c2faa46df22b4f153f0ff680ae7cd0 Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Fri, 6 Feb 2026 16:41:45 -0800 Subject: [PATCH 13/14] HIVE-29368: PR feedback + some SonarQube items --- .../hadoop/hive/ql/plan/Statistics.java | 31 ++--- .../estimator/BranchingStatEstimator.java | 12 +- .../ql/udf/generic/GenericUDFCoalesce.java | 56 ++++++--- .../annotation/TestStatsRulesProcFactory.java | 46 +++++++ .../hadoop/hive/ql/plan/TestStatistics.java | 118 ++++++++++++++++++ .../TestPessimisticStatCombiner.java | 43 ++----- .../TestGenericUDFCoalesceStatEstimator.java | 96 +++++++------- 7 files changed, 283 insertions(+), 119 deletions(-) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/plan/TestStatistics.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java index 0b5f6605fc68..b052304a639f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java @@ -240,37 +240,30 @@ public void setColumnStats(List colStats) { } public void addToColumnStats(List colStats) { - if (columnStats == null) { columnStats = Maps.newHashMap(); } if (colStats != null) { for (ColStatistics cs : colStats) { - ColStatistics updatedCS = null; if (cs != null) { - - String key = cs.getColumnName(); - // if column statistics for a column is already found then merge the statistics - if (columnStats.containsKey(key) && columnStats.get(key) != null) { - updatedCS = columnStats.get(key); - updatedCS.setAvgColLen(Math.max(updatedCS.getAvgColLen(), cs.getAvgColLen())); - updatedCS.setNumNulls(StatsUtils.safeAdd(updatedCS.getNumNulls(), cs.getNumNulls())); - if (updatedCS.getCountDistint() > 0 && cs.getCountDistint() > 0) { - updatedCS.setCountDistint(Math.max(updatedCS.getCountDistint(), cs.getCountDistint())); - } else { - // If one is unknown, the product is also unknown - updatedCS.setCountDistint(0); - } - columnStats.put(key, updatedCS); - } else { - columnStats.put(key, cs); - } + columnStats.merge(cs.getColumnName(), cs, Statistics::mergeColStats); } } } } + private static ColStatistics mergeColStats(ColStatistics existing, ColStatistics incoming) { + existing.setAvgColLen(Math.max(existing.getAvgColLen(), incoming.getAvgColLen())); + existing.setNumNulls(StatsUtils.safeAdd(existing.getNumNulls(), incoming.getNumNulls())); + if (existing.getCountDistint() > 0 && incoming.getCountDistint() > 0) { + existing.setCountDistint(Math.max(existing.getCountDistint(), incoming.getCountDistint())); + } else { + existing.setCountDistint(0); + } + return existing; + } + public void updateColumnStatsState(State newState) { this.columnStatsState = inferColumnStatsState(columnStatsState, newState); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/BranchingStatEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/BranchingStatEstimator.java index 2fe6bd2ce35c..4628884062cc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/BranchingStatEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/BranchingStatEstimator.java @@ -38,12 +38,14 @@ protected BranchingStatEstimator(int numberOfDistinctConstants) { public Optional estimate(List argStats) { PessimisticStatCombiner combiner = new PessimisticStatCombiner(); addBranchStats(combiner, argStats); - if (numberOfDistinctConstants > 1) { - ColStatistics constantsStat = new ColStatistics("_constants", "string"); - constantsStat.setCountDistint(numberOfDistinctConstants); - combiner.add(constantsStat); + Optional result = combiner.getResult(); + if (result.isPresent()) { + ColStatistics stat = result.get(); + if (numberOfDistinctConstants > stat.getCountDistint() && stat.getCountDistint() > 0) { + stat.setCountDistint(numberOfDistinctConstants); + } } - return combiner.getResult(); + return result; } protected abstract void addBranchStats(PessimisticStatCombiner combiner, List argStats); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java index 600c81908e46..92eff7456643 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java @@ -18,16 +18,14 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.util.HashSet; import java.util.List; -import java.util.Set; +import java.util.Optional; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ColStatistics; -import org.apache.hadoop.hive.ql.stats.estimator.BranchingStatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.PessimisticStatCombiner; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider; @@ -48,7 +46,7 @@ public class GenericUDFCoalesce extends GenericUDF implements StatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; - private transient int numberOfDistinctConstants; + private transient int firstConstantIndex = -1; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentTypeException { @@ -56,7 +54,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen argumentOIs = arguments; returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); - Set distinctConstants = new HashSet<>(); + firstConstantIndex = -1; for (int i = 0; i < arguments.length; i++) { if (!returnOIResolver.update(arguments[i])) { @@ -66,13 +64,11 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen + "\" is expected but \"" + arguments[i].getTypeName() + "\" is found"); } - if (arguments[i] instanceof ConstantObjectInspector) { - distinctConstants.add(((ConstantObjectInspector) arguments[i]).getWritableConstantValue()); + if (firstConstantIndex < 0 && arguments[i] instanceof ConstantObjectInspector) { + firstConstantIndex = i; } } - numberOfDistinctConstants = distinctConstants.size(); - return returnOIResolver.get(); } @@ -95,19 +91,47 @@ public String getDisplayString(String[] children) { @Override public StatEstimator getStatEstimator() { - return new CoalesceStatEstimator(numberOfDistinctConstants); + return new CoalesceStatEstimator(firstConstantIndex); } - static class CoalesceStatEstimator extends BranchingStatEstimator { - CoalesceStatEstimator(int numberOfDistinctConstants) { - super(numberOfDistinctConstants); + /** + * COALESCE returns the first non-null argument, so only values before (and including) + * the first constant are reachable. Constants after the first one can never be returned. + */ + static class CoalesceStatEstimator implements StatEstimator { + private final int firstConstantIndex; + + CoalesceStatEstimator(int firstConstantIndex) { + this.firstConstantIndex = firstConstantIndex; } @Override - protected void addBranchStats(PessimisticStatCombiner combiner, List argStats) { - for (ColStatistics argStat : argStats) { - combiner.add(argStat); + public Optional estimate(List argStats) { + PessimisticStatCombiner combiner = new PessimisticStatCombiner(); + + if (firstConstantIndex == 0) { + // First arg is constant - always returns that constant, NDV = 1 + combiner.add(argStats.get(0)); + return combiner.getResult(); + } + + // Combine stats of columns before the first constant (or all if no constant) + int limit = firstConstantIndex > 0 ? firstConstantIndex : argStats.size(); + for (int i = 0; i < limit; i++) { + combiner.add(argStats.get(i)); } + + Optional result = combiner.getResult(); + + // If there's a constant after columns, add 1 to NDV for that constant + if (result.isPresent() && firstConstantIndex > 0) { + ColStatistics stat = result.get(); + if (stat.getCountDistint() > 0) { + stat.setCountDistint(stat.getCountDistint() + 1); + } + } + + return result; } } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java index 1290a54c1f3c..fd40cbeddd81 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java @@ -43,7 +43,9 @@ import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.FilterStatsRule.extractFloatFromLiteralValue; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThrows; +import static org.junit.Assert.assertTrue; public class TestStatsRulesProcFactory { @@ -631,4 +633,48 @@ private static ColStatistics createColStatistics( return colStatistics; } + + @Test + public void testSatisfyPreconditionJoinKeysEmptyTableZeroRows() { + Statistics stats = new Statistics(0, 0, 0, 0); + ColStatistics colStats = new ColStatistics("key", "string"); + colStats.setCountDistint(0L); + stats.addToColumnStats(Collections.singletonList(colStats)); + + // Empty table (numRows=0) with NDV=0: should return true since NDV=0 is legitimate + assertTrue(StatsRulesProcFactory.satisfyPrecondition(stats, Arrays.asList("key"))); + } + + @Test + public void testSatisfyPreconditionJoinKeysEmptyTableOneRow() { + Statistics stats = new Statistics(1, 0, 0, 0); + ColStatistics colStats = new ColStatistics("key", "string"); + colStats.setCountDistint(0L); + stats.addToColumnStats(Collections.singletonList(colStats)); + + // Near-empty table (numRows=1, bumped from 0) with NDV=0: should return true + assertTrue(StatsRulesProcFactory.satisfyPrecondition(stats, Arrays.asList("key"))); + } + + @Test + public void testSatisfyPreconditionJoinKeysNonEmptyTableZeroNdv() { + Statistics stats = new Statistics(2, 0, 0, 0); + ColStatistics colStats = new ColStatistics("key", "string"); + colStats.setCountDistint(0L); + stats.addToColumnStats(Collections.singletonList(colStats)); + + // Non-empty table (numRows=2) with NDV=0: should return false (unknown stats) + assertFalse(StatsRulesProcFactory.satisfyPrecondition(stats, Arrays.asList("key"))); + } + + @Test + public void testSatisfyPreconditionJoinKeysNonEmptyTablePositiveNdv() { + Statistics stats = new Statistics(2, 0, 0, 0); + ColStatistics colStats = new ColStatistics("key", "string"); + colStats.setCountDistint(1L); + stats.addToColumnStats(Collections.singletonList(colStats)); + + // Non-empty table with positive NDV: should return true + assertTrue(StatsRulesProcFactory.satisfyPrecondition(stats, Arrays.asList("key"))); + } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestStatistics.java b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestStatistics.java new file mode 100644 index 000000000000..7093feef5170 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestStatistics.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.Arrays; +import java.util.Collections; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; + +class TestStatistics { + + @Test + void testAddToColumnStatsSingleColumn() { + Statistics stats = new Statistics(100, 1000, 0, 0); + ColStatistics colStats = createColStats("col1", 50, 10, 8.0); + + stats.addToColumnStats(Collections.singletonList(colStats)); + + ColStatistics result = stats.getColumnStatisticsFromColName("col1"); + assertEquals(50, result.getCountDistint()); + assertEquals(10, result.getNumNulls()); + assertEquals(8.0, result.getAvgColLen()); + } + + @Test + void testAddToColumnStatsMergesTakesMaxAvgColLen() { + Statistics stats = new Statistics(100, 1000, 0, 0); + ColStatistics first = createColStats("col1", 50, 10, 8.0); + ColStatistics second = createColStats("col1", 60, 20, 12.0); + + stats.addToColumnStats(Collections.singletonList(first)); + stats.addToColumnStats(Collections.singletonList(second)); + + ColStatistics result = stats.getColumnStatisticsFromColName("col1"); + assertEquals(12.0, result.getAvgColLen()); + } + + @Test + void testAddToColumnStatsMergesSumsNumNulls() { + Statistics stats = new Statistics(100, 1000, 0, 0); + ColStatistics first = createColStats("col1", 50, 10, 8.0); + ColStatistics second = createColStats("col1", 60, 20, 8.0); + + stats.addToColumnStats(Collections.singletonList(first)); + stats.addToColumnStats(Collections.singletonList(second)); + + ColStatistics result = stats.getColumnStatisticsFromColName("col1"); + assertEquals(30, result.getNumNulls()); + } + + @ParameterizedTest(name = "ndv1={0}, ndv2={1} -> expected={2}") + @CsvSource({ + "50, 60, 60", // both known, takes max + "50, 0, 0", // second unknown, result unknown + "0, 60, 0" // first unknown, result unknown + }) + void testAddToColumnStatsMergesNdv(long ndv1, long ndv2, long expectedNdv) { + Statistics stats = new Statistics(100, 1000, 0, 0); + ColStatistics first = createColStats("col1", ndv1, 10, 8.0); + ColStatistics second = createColStats("col1", ndv2, 20, 8.0); + + stats.addToColumnStats(Collections.singletonList(first)); + stats.addToColumnStats(Collections.singletonList(second)); + + ColStatistics result = stats.getColumnStatisticsFromColName("col1"); + assertEquals(expectedNdv, result.getCountDistint()); + } + + @Test + void testAddToColumnStatsNullListIsNoOp() { + Statistics stats = new Statistics(100, 1000, 0, 0); + ColStatistics colStats = createColStats("col1", 50, 10, 8.0); + stats.addToColumnStats(Collections.singletonList(colStats)); + + stats.addToColumnStats(null); + + assertEquals(1, stats.getColumnStats().size()); + } + + @Test + void testAddToColumnStatsNullElementIsSkipped() { + Statistics stats = new Statistics(100, 1000, 0, 0); + ColStatistics colStats = createColStats("col1", 50, 10, 8.0); + + stats.addToColumnStats(Arrays.asList(null, colStats, null)); + + assertEquals(1, stats.getColumnStats().size()); + assertEquals(50, stats.getColumnStatisticsFromColName("col1").getCountDistint()); + } + + private ColStatistics createColStats(String name, long ndv, long numNulls, double avgColLen) { + ColStatistics cs = new ColStatistics(name, "string"); + cs.setCountDistint(ndv); + cs.setNumNulls(numNulls); + cs.setAvgColLen(avgColLen); + return cs; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java index 9cfc561a5358..aa2a0a6184ae 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java @@ -27,6 +27,8 @@ import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; class TestPessimisticStatCombiner { @@ -77,43 +79,22 @@ void testCombineTakesMaxOfNumNulls() { assertEquals(200, combined.getNumNulls()); } - @Test - void testCombineTakesMaxOfNdv() { - ColStatistics stat1 = createStat("col1", "int", 100, 10, 4.0); - ColStatistics stat2 = createStat("col2", "int", 200, 20, 4.0); - - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); - combiner.add(stat1); - combiner.add(stat2); - - ColStatistics combined = combiner.getResult().get(); - assertEquals(200, combined.getCountDistint()); - } - - @Test - void testCombineWithUnknownNdvReturnsZero() { - ColStatistics stat1 = createStat("col1", "int", 100, 10, 4.0); - ColStatistics stat2 = createStat("col2", "int", 0, 20, 4.0); - - PessimisticStatCombiner combiner = new PessimisticStatCombiner(); - combiner.add(stat1); - combiner.add(stat2); - - ColStatistics combined = combiner.getResult().get(); - assertEquals(0, combined.getCountDistint()); - } - - @Test - void testCombineWithFirstStatUnknownNdvReturnsZero() { - ColStatistics stat1 = createStat("col1", "int", 0, 10, 4.0); - ColStatistics stat2 = createStat("col2", "int", 200, 20, 4.0); + @ParameterizedTest(name = "ndv1={0}, ndv2={1} -> expected={2}") + @CsvSource({ + "100, 200, 200", // takes max when both known + "100, 0, 0", // unknown (0) in second propagates + "0, 200, 0" // unknown (0) in first propagates + }) + void testCombineNdvBehavior(long ndv1, long ndv2, long expectedNdv) { + ColStatistics stat1 = createStat("col1", "int", ndv1, 10, 4.0); + ColStatistics stat2 = createStat("col2", "int", ndv2, 20, 4.0); PessimisticStatCombiner combiner = new PessimisticStatCombiner(); combiner.add(stat1); combiner.add(stat2); ColStatistics combined = combiner.getResult().get(); - assertEquals(0, combined.getCountDistint()); + assertEquals(expectedNdv, combined.getCountDistint()); } @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java index 49fc1a8a4a83..60989ac60fe5 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFCoalesceStatEstimator.java @@ -36,7 +36,8 @@ class TestGenericUDFCoalesceStatEstimator { @Test - void testAllArgumentsConstantDistinctValues() throws UDFArgumentTypeException { + void testAllArgumentsConstant() throws UDFArgumentTypeException { + // COALESCE('A', 'B', 'C') - first constant 'A' is always returned GenericUDFCoalesce udf = new GenericUDFCoalesce(); ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( @@ -50,68 +51,71 @@ void testAllArgumentsConstantDistinctValues() throws UDFArgumentTypeException { StatEstimator estimator = udf.getStatEstimator(); - Optional result = estimator.estimate(Arrays.asList( - createColStats("arg1", 1, 0), - createColStats("arg2", 1, 0), - createColStats("arg3", 1, 0))); + ColStatistics arg1Stats = createColStats("arg1", 1, 0); + arg1Stats.setAvgColLen(5.0); + ColStatistics arg2Stats = createColStats("arg2", 1, 0); + arg2Stats.setAvgColLen(25.0); + ColStatistics arg3Stats = createColStats("arg3", 1, 0); + arg3Stats.setAvgColLen(15.0); + + Optional result = estimator.estimate(Arrays.asList(arg1Stats, arg2Stats, arg3Stats)); assertTrue(result.isPresent()); - assertEquals(3, result.get().getCountDistint()); + assertEquals(1, result.get().getCountDistint()); + assertEquals(5.0, result.get().getAvgColLen()); } @Test - void testAllArgumentsConstantWithDuplicates() throws UDFArgumentTypeException { + void testSingleConstantArgument() throws UDFArgumentTypeException { GenericUDFCoalesce udf = new GenericUDFCoalesce(); ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( TypeInfoFactory.stringTypeInfo, new Text("A")); - ObjectInspector constA2 = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - TypeInfoFactory.stringTypeInfo, new Text("A")); - ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - TypeInfoFactory.stringTypeInfo, new Text("B")); - udf.initialize(new ObjectInspector[]{constA, constA2, constB}); + udf.initialize(new ObjectInspector[]{constA}); StatEstimator estimator = udf.getStatEstimator(); Optional result = estimator.estimate(Arrays.asList( - createColStats("arg1", 1, 0), - createColStats("arg2", 1, 0), - createColStats("arg3", 1, 0))); + createColStats("arg1", 1, 0))); assertTrue(result.isPresent()); - assertEquals(2, result.get().getCountDistint()); + assertEquals(1, result.get().getCountDistint()); } @Test - void testSingleConstantArgument() throws UDFArgumentTypeException { + void testMixedConstantAndNonConstantArguments() throws UDFArgumentTypeException { + // COALESCE('A', nonConst, 'C') - first arg is constant 'A', always returned, NDV = 1 GenericUDFCoalesce udf = new GenericUDFCoalesce(); ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( TypeInfoFactory.stringTypeInfo, new Text("A")); + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("C")); - udf.initialize(new ObjectInspector[]{constA}); + udf.initialize(new ObjectInspector[]{constA, nonConst, constC}); StatEstimator estimator = udf.getStatEstimator(); + // Constants have NDV=1, non-constants have their actual NDV Optional result = estimator.estimate(Arrays.asList( - createColStats("arg1", 1, 0))); + createColStats("constA", 1, 0), + createColStats("col", 200, 20), + createColStats("constC", 1, 0))); assertTrue(result.isPresent()); assertEquals(1, result.get().getCountDistint()); } @Test - void testMixedConstantAndNonConstantArguments() throws UDFArgumentTypeException { + void testAllNonConstantArguments() throws UDFArgumentTypeException { + // COALESCE(col1, col2, col3) - no constants, NDV = max of all columns GenericUDFCoalesce udf = new GenericUDFCoalesce(); - ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - TypeInfoFactory.stringTypeInfo, new Text("A")); ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; - ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - TypeInfoFactory.stringTypeInfo, new Text("C")); - udf.initialize(new ObjectInspector[]{constA, nonConst, constC}); + udf.initialize(new ObjectInspector[]{nonConst, nonConst, nonConst}); StatEstimator estimator = udf.getStatEstimator(); @@ -125,50 +129,46 @@ void testMixedConstantAndNonConstantArguments() throws UDFArgumentTypeException } @Test - void testAllNonConstantArguments() throws UDFArgumentTypeException { + void testColumnThenConstant() throws UDFArgumentTypeException { + // COALESCE(col, 'default') - returns col values OR 'default', NDV = NDV(col) + 1 GenericUDFCoalesce udf = new GenericUDFCoalesce(); ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector constDefault = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("default")); - udf.initialize(new ObjectInspector[]{nonConst, nonConst, nonConst}); + udf.initialize(new ObjectInspector[]{nonConst, constDefault}); StatEstimator estimator = udf.getStatEstimator(); Optional result = estimator.estimate(Arrays.asList( - createColStats("arg1", 100, 10), - createColStats("arg2", 200, 20), - createColStats("arg3", 300, 30))); + createColStats("col", 100, 10), + createColStats("const", 1, 0))); assertTrue(result.isPresent()); - assertEquals(300, result.get().getCountDistint()); + assertEquals(101, result.get().getCountDistint()); } @Test - void testConstantArgumentsTakesMaxAvgColLen() throws UDFArgumentTypeException { + void testMultipleColumnsThenConstant() throws UDFArgumentTypeException { + // COALESCE(col1, col2, 'default') - returns col1, col2, or 'default', NDV = max(col1, col2) + 1 GenericUDFCoalesce udf = new GenericUDFCoalesce(); - ObjectInspector constA = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - TypeInfoFactory.stringTypeInfo, new Text("A")); - ObjectInspector constB = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - TypeInfoFactory.stringTypeInfo, new Text("B")); - ObjectInspector constC = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - TypeInfoFactory.stringTypeInfo, new Text("C")); + ObjectInspector nonConst = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + ObjectInspector constDefault = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.stringTypeInfo, new Text("default")); - udf.initialize(new ObjectInspector[]{constA, constB, constC}); + udf.initialize(new ObjectInspector[]{nonConst, nonConst, constDefault}); StatEstimator estimator = udf.getStatEstimator(); - ColStatistics arg1Stats = createColStats("arg1", 100, 10); - arg1Stats.setAvgColLen(5.0); - ColStatistics arg2Stats = createColStats("arg2", 200, 20); - arg2Stats.setAvgColLen(25.0); - ColStatistics arg3Stats = createColStats("arg3", 300, 30); - arg3Stats.setAvgColLen(15.0); - - Optional result = estimator.estimate(Arrays.asList(arg1Stats, arg2Stats, arg3Stats)); + Optional result = estimator.estimate(Arrays.asList( + createColStats("col1", 100, 10), + createColStats("col2", 200, 20), + createColStats("const", 1, 0))); assertTrue(result.isPresent()); - assertEquals(25.0, result.get().getAvgColLen()); + assertEquals(201, result.get().getCountDistint()); } private ColStatistics createColStats(String name, long ndv, long numNulls) { From b59cc9dd645b8d5859307ffcbe6a6155cfb8fc7d Mon Sep 17 00:00:00 2001 From: Konstantin Bereznyakov Date: Fri, 6 Feb 2026 18:10:16 -0800 Subject: [PATCH 14/14] HIVE-29368: .out files + a misc tweak for NULL IF NDVs --- .../hive/ql/udf/generic/GenericUDFIf.java | 8 +- .../TestGenericUDFIfStatEstimator.java | 27 +- .../clientpositive/llap/auto_join32.q.out | 14 +- .../clientpositive/llap/cbo_const.q.out | 10 +- .../clientpositive/llap/constant_prop_3.q.out | 28 +- .../clientpositive/llap/deleteAnalyze.q.out | 4 +- .../llap/materialized_view_rewrite_5.q.out | 12 +- .../llap/materialized_view_rewrite_7.q.out | 8 +- .../perf/tpcds30tb/json/query21.q.out | 890 +++++----- .../perf/tpcds30tb/json/query22.q.out | 6 +- .../perf/tpcds30tb/json/query23.q.out | 10 +- .../perf/tpcds30tb/json/query24.q.out | 8 +- .../perf/tpcds30tb/json/query25.q.out | 14 +- .../perf/tpcds30tb/json/query26.q.out | 6 +- .../perf/tpcds30tb/json/query27.q.out | 10 +- .../perf/tpcds30tb/json/query29.q.out | 14 +- .../perf/tpcds30tb/json/query30.q.out | 2 +- .../perf/tpcds30tb/json/query31.q.out | 2 +- .../perf/tpcds30tb/json/query32.q.out | 696 ++++---- .../perf/tpcds30tb/json/query33.q.out | 10 +- .../perf/tpcds30tb/json/query34.q.out | 6 +- .../perf/tpcds30tb/json/query35.q.out | 2 +- .../perf/tpcds30tb/json/query36.q.out | 10 +- .../perf/tpcds30tb/json/query37.q.out | 1170 ++++++------- .../perf/tpcds30tb/json/query38.q.out | 2 +- .../perf/tpcds30tb/json/query39.q.out | 2 +- .../perf/tpcds30tb/json/query40.q.out | 790 ++++----- .../perf/tpcds30tb/json/query41.q.out | 8 +- .../perf/tpcds30tb/json/query42.q.out | 6 +- .../perf/tpcds30tb/json/query43.q.out | 6 +- .../perf/tpcds30tb/json/query44.q.out | 8 +- .../perf/tpcds30tb/json/query45.q.out | 6 +- .../perf/tpcds30tb/json/query46.q.out | 6 +- .../perf/tpcds30tb/json/query47.q.out | 10 +- .../perf/tpcds30tb/json/query48.q.out | 2 +- .../perf/tpcds30tb/json/query49.q.out | 2 +- .../perf/tpcds30tb/json/query50.q.out | 6 +- .../perf/tpcds30tb/json/query51.q.out | 2 +- .../perf/tpcds30tb/json/query52.q.out | 6 +- .../perf/tpcds30tb/json/query53.q.out | 6 +- .../perf/tpcds30tb/json/query54.q.out | 14 +- .../perf/tpcds30tb/json/query55.q.out | 6 +- .../perf/tpcds30tb/json/query56.q.out | 10 +- .../perf/tpcds30tb/json/query57.q.out | 10 +- .../perf/tpcds30tb/json/query58.q.out | 139 +- .../perf/tpcds30tb/json/query59.q.out | 12 +- .../perf/tpcds30tb/json/query60.q.out | 10 +- .../perf/tpcds30tb/json/query61.q.out | 10 +- .../perf/tpcds30tb/json/query62.q.out | 6 +- .../perf/tpcds30tb/json/query63.q.out | 6 +- .../perf/tpcds30tb/json/query64.q.out | 14 +- .../perf/tpcds30tb/json/query65.q.out | 10 +- .../perf/tpcds30tb/json/query66.q.out | 2 +- .../perf/tpcds30tb/json/query67.q.out | 10 +- .../perf/tpcds30tb/json/query68.q.out | 6 +- .../perf/tpcds30tb/json/query69.q.out | 2 +- .../perf/tpcds30tb/json/query70.q.out | 12 +- .../perf/tpcds30tb/json/query71.q.out | 6 +- .../perf/tpcds30tb/json/query72.q.out | 10 +- .../perf/tpcds30tb/json/query73.q.out | 6 +- .../perf/tpcds30tb/json/query74.q.out | 2 +- .../perf/tpcds30tb/json/query75.q.out | 6 +- .../perf/tpcds30tb/json/query76.q.out | 6 +- .../perf/tpcds30tb/json/query77.q.out | 2 +- .../perf/tpcds30tb/json/query78.q.out | 2 +- .../perf/tpcds30tb/json/query79.q.out | 6 +- .../perf/tpcds30tb/json/query80.q.out | 1202 +++++++------- .../perf/tpcds30tb/json/query81.q.out | 2 +- .../perf/tpcds30tb/json/query82.q.out | 1170 ++++++------- .../perf/tpcds30tb/json/query83.q.out | 8 +- .../perf/tpcds30tb/json/query85.q.out | 2 +- .../perf/tpcds30tb/json/query86.q.out | 6 +- .../perf/tpcds30tb/json/query87.q.out | 2 +- .../perf/tpcds30tb/json/query88.q.out | 4 +- .../perf/tpcds30tb/json/query89.q.out | 10 +- .../perf/tpcds30tb/json/query90.q.out | 4 +- .../perf/tpcds30tb/json/query91.q.out | 6 +- .../perf/tpcds30tb/json/query92.q.out | 1102 ++++++------- .../perf/tpcds30tb/json/query94.q.out | 1442 ++++++++-------- .../perf/tpcds30tb/json/query95.q.out | 1444 ++++++++--------- .../perf/tpcds30tb/json/query96.q.out | 4 +- .../perf/tpcds30tb/json/query97.q.out | 2 +- .../perf/tpcds30tb/json/query98.q.out | 738 ++++----- .../perf/tpcds30tb/json/query99.q.out | 6 +- .../tez/flatten_union_subdir.q.out | 82 +- 85 files changed, 5714 insertions(+), 5682 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index e9004b7a2b6e..6be495cfed78 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -165,7 +165,13 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen if (thenIsConstant && elseIsConstant) { Object thenValue = ((ConstantObjectInspector) arguments[1]).getWritableConstantValue(); Object elseValue = ((ConstantObjectInspector) arguments[2]).getWritableConstantValue(); - numberOfDistinctConstants = Objects.equals(thenValue, elseValue) ? 1 : 2; + if (thenValue == null && elseValue == null) { + numberOfDistinctConstants = 0; + } else if (thenValue == null || elseValue == null) { + numberOfDistinctConstants = 1; + } else { + numberOfDistinctConstants = Objects.equals(thenValue, elseValue) ? 1 : 2; + } } else if (thenIsConstant || elseIsConstant) { numberOfDistinctConstants = 1; } else { diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java index 83d3acff9798..f73bea685564 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFIfStatEstimator.java @@ -27,10 +27,12 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.plan.ColStatistics; import org.apache.hadoop.hive.ql.stats.estimator.StatEstimator; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.Text; +import org.apache.hadoop.hive.common.type.Timestamp; import org.junit.jupiter.api.Test; class TestGenericUDFIfStatEstimator { @@ -166,7 +168,7 @@ void testNullAndNonNullConstants() throws UDFArgumentException { createColStats("else_col", 1, 0))); assertTrue(result.isPresent()); - assertEquals(2, result.get().getCountDistint()); + assertEquals(1, result.get().getCountDistint()); } @Test @@ -192,6 +194,29 @@ void testBothNullConstants() throws UDFArgumentException { assertEquals(1, result.get().getCountDistint()); } + @Test + void testTimestampConstantAndNullConstant() throws UDFArgumentException { + GenericUDFIf udf = new GenericUDFIf(); + + ObjectInspector conditionOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; + ObjectInspector thenOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.timestampTypeInfo, new TimestampWritableV2(Timestamp.valueOf("2011-01-01 01:01:01"))); + ObjectInspector elseOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.timestampTypeInfo, null); + + udf.initialize(new ObjectInspector[]{conditionOI, thenOI, elseOI}); + + StatEstimator estimator = udf.getStatEstimator(); + + Optional result = estimator.estimate( + Arrays.asList(createColStats("cond", 2, 0), + createColStats("then_col", 1, 0), + createColStats("else_col", 1, 0))); + + assertTrue(result.isPresent()); + assertEquals(1, result.get().getCountDistint()); + } + @Test void testConstantBranchesTakesMaxAvgColLen() throws UDFArgumentException { GenericUDFIf udf = new GenericUDFIf(); diff --git a/ql/src/test/results/clientpositive/llap/auto_join32.q.out b/ql/src/test/results/clientpositive/llap/auto_join32.q.out index 3f1783db3a03..3a7e7b404fcb 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join32.q.out @@ -546,7 +546,7 @@ STAGE PLANS: TableScan alias: s filterExpr: ((p = 'bar') and name is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_34_container, bigKeyColName:name, smallTablePos:1, keyRatio:1.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_34_container, bigKeyColName:name, smallTablePos:1, keyRatio:0.0 Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((p = 'bar') and name is not null) (type: boolean) @@ -564,19 +564,19 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col2 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: unknown Map 3 @@ -608,16 +608,16 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/cbo_const.q.out b/ql/src/test/results/clientpositive/llap/cbo_const.q.out index 43b0bf4577ad..0b9e4d682011 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_const.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_const.q.out @@ -298,13 +298,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2 - Statistics: Num rows: 13 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 2310 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Reducer 3 Execution mode: llap @@ -316,14 +316,14 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col2, _col4 - Statistics: Num rows: 275 Data size: 48950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 48950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 275 Data size: 48950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out b/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out index 412038304bcc..51932053eab0 100644 --- a/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out +++ b/ql/src/test/results/clientpositive/llap/constant_prop_3.q.out @@ -317,30 +317,30 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col7, _col9 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col7 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: bigint), _col5 (type: bigint), _col9 (type: boolean) outputColumnNames: _col1, _col3, _col4, _col5, _col6, _col7, _col9 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col6 = 0L) or (_col9 is null and (_col7 >= _col6) and _col1 is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col3 (type: string), _col4 (type: string), _col5 (type: int) outputColumnNames: _col1, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col3 (type: string), _col4 (type: string), _col5 (type: int), _col1 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int) null sort order: zzzz sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: vectorized, llap Reduce Operator Tree: @@ -348,32 +348,32 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col0 (type: string), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) keys: _col1 (type: string), _col2 (type: string), _col3 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: bigint), _col0 (type: string), _col1 (type: string), _col2 (type: int) null sort order: azzz sort order: -+++ - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: int), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 211 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -388,13 +388,13 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col3 (type: boolean) Reducer 9 Execution mode: vectorized, llap diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index 6da801115ace..5068a86de524 100644 --- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -173,9 +173,9 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=1 width=96) + Select Operator [SEL_9] (rows=1 width=228) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_25] (rows=1 width=96) + Merge Join Operator [MERGEJOIN_25] (rows=1 width=228) Conds:RS_28._col1=RS_31._col0(Inner),Output:["_col0","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_28] diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out index b98b0853d686..1d5de69e74b5 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_5.q.out @@ -986,20 +986,20 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col3 - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col3) keys: _col1 (type: varchar(256)) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(256)) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: varchar(256)) - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -1009,10 +1009,10 @@ STAGE PLANS: keys: KEY._col0 (type: varchar(256)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out index 06a4b05a6f8b..5ebc9161f00a 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out @@ -305,7 +305,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -411,7 +411,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: int) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -707,7 +707,7 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE @@ -773,7 +773,7 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) - minReductionHashAggr: 0.5 + minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query21.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query21.q.out index 5a956dce8c47..cdd3959baa76 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query21.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query21.q.out @@ -6,268 +6,96 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "inventory" - ], - "table:alias": "inventory", - "inputs": [], - "rowCount": 1627857000, - "avgRowSize": 157, - "rowType": { - "fields": [ - { - "type": "BIGINT", - "nullable": false, - "name": "inv_date_sk" - }, - { - "type": "BIGINT", - "nullable": false, - "name": "inv_item_sk" - }, - { - "type": "BIGINT", - "nullable": false, - "name": "inv_warehouse_sk" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "inv_quantity_on_hand" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "BLOCK__OFFSET__INSIDE__FILE" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 2147483647, - "name": "INPUT__FILE__NAME" - }, - { - "fields": [ - { - "type": "BIGINT", - "nullable": true, - "name": "writeid" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "bucketid" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "rowid" - } - ], - "nullable": true, - "name": "ROW__ID" - }, - { - "type": "BOOLEAN", - "nullable": true, - "name": "ROW__IS__DELETED" - } - ], - "nullable": false - }, - "colStats": [ - { - "name": "inv_date_sk", - "ndv": 258, - "minValue": 2450815, - "maxValue": 2452635 - }, - { - "name": "inv_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 - }, - { - "name": "inv_warehouse_sk", - "ndv": 27, - "minValue": 1, - "maxValue": 27 - }, - { - "name": "inv_quantity_on_hand", - "ndv": 987, - "minValue": 0, - "maxValue": 1000 - } - ] - }, - { - "id": "1", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", - "fields": [ - "inv_date_sk", - "inv_item_sk", - "inv_warehouse_sk", - "inv_quantity_on_hand" - ], - "exprs": [ - { - "input": 0, - "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, - { - "input": 2, - "name": "$2" - }, - { - "input": 3, - "name": "$3" - } - ], - "rowCount": 1627857000 - }, - { - "id": "2", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", - "table": [ - "default", - "item" + "warehouse" ], - "table:alias": "item", + "table:alias": "warehouse", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 27, + "avgRowSize": 679, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "w_warehouse_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" - }, - { - "type": "DATE", - "nullable": true, - "name": "i_rec_start_date" - }, - { - "type": "DATE", - "nullable": true, - "name": "i_rec_end_date" + "name": "w_warehouse_id" }, { "type": "VARCHAR", "nullable": true, - "precision": 200, - "name": "i_item_desc" - }, - { - "type": "DECIMAL", - "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" - }, - { - "type": "DECIMAL", - "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "i_brand_id" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 50, - "name": "i_brand" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "i_class_id" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 50, - "name": "i_class" + "precision": 20, + "name": "w_warehouse_name" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "w_warehouse_sq_ft" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_category" + "precision": 10, + "name": "w_street_number" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "i_manufact_id" + "precision": 60, + "name": "w_street_name" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 15, + "name": "w_street_type" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 10, + "name": "w_suite_number" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 60, + "name": "w_city" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 30, + "name": "w_county" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 2, + "name": "w_state" }, { "type": "CHAR", "nullable": true, "precision": 10, - "name": "i_container" + "name": "w_zip" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "i_manager_id" + "precision": 20, + "name": "w_country" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 5, + "scale": 2, + "name": "w_gmt_offset" }, { "type": "BIGINT", @@ -311,164 +139,196 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, + "name": "w_warehouse_sk", + "ndv": 27, "minValue": 1, - "maxValue": 462000 - }, - { - "name": "i_item_id", - "ndv": 247524 - }, - { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 - }, - { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 - }, - { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 - }, - { - "name": "i_item_desc", - "ndv": 341846 - }, - { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "maxValue": 27 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "w_warehouse_name", + "ndv": 27 }, { - "name": "i_brand", - "ndv": 742 + "name": "w_warehouse_id", + "ndv": 27 }, { - "name": "i_class_id", - "ndv": 16, - "minValue": 1, - "maxValue": 16 + "name": "w_warehouse_sq_ft", + "ndv": 26, + "minValue": 73065, + "maxValue": 977787 }, { - "name": "i_class", - "ndv": 99 + "name": "w_street_number", + "ndv": 26 }, { - "name": "i_category_id", - "ndv": 10, - "minValue": 1, - "maxValue": 10 + "name": "w_street_name", + "ndv": 27 }, { - "name": "i_category", - "ndv": 11 + "name": "w_street_type", + "ndv": 16 }, { - "name": "i_manufact_id", - "ndv": 987, - "minValue": 1, - "maxValue": 1000 + "name": "w_suite_number", + "ndv": 21 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "w_city", + "ndv": 18 }, { - "name": "i_size", - "ndv": 8 + "name": "w_county", + "ndv": 14 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "w_state", + "ndv": 12 }, { - "name": "i_color", - "ndv": 95 + "name": "w_zip", + "ndv": 24 }, { - "name": "i_units", - "ndv": 21 + "name": "w_country", + "ndv": 1 }, { - "name": "i_container", - "ndv": 2 - }, + "name": "w_gmt_offset", + "ndv": 4, + "minValue": -8, + "maxValue": -5 + } + ] + }, + { + "id": "1", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", + "fields": [ + "w_warehouse_sk", + "w_warehouse_name" + ], + "exprs": [ { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "input": 0, + "name": "$0" }, { - "name": "i_product_name", - "ndv": 461487 + "input": 2, + "name": "$2" } - ] + ], + "rowCount": 27 }, { - "id": "3", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" - }, - "operands": [ + "id": "2", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", + "table": [ + "default", + "inventory" + ], + "table:alias": "inventory", + "inputs": [], + "rowCount": 1627857000, + "avgRowSize": 157, + "rowType": { + "fields": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } + "type": "BIGINT", + "nullable": false, + "name": "inv_date_sk" }, { - "input": 5, - "name": "$5" + "type": "BIGINT", + "nullable": false, + "name": "inv_item_sk" }, { - "literal": 0.99, - "type": { - "type": "DECIMAL", - "nullable": false, - "precision": 3, - "scale": 2 - } + "type": "BIGINT", + "nullable": false, + "name": "inv_warehouse_sk" }, { - "literal": 1.49, - "type": { - "type": "DECIMAL", - "nullable": false, - "precision": 3, - "scale": 2 - } + "type": "INTEGER", + "nullable": true, + "name": "inv_quantity_on_hand" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "BLOCK__OFFSET__INSIDE__FILE" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 2147483647, + "name": "INPUT__FILE__NAME" + }, + { + "fields": [ + { + "type": "BIGINT", + "nullable": true, + "name": "writeid" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "bucketid" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "rowid" + } + ], + "nullable": true, + "name": "ROW__ID" + }, + { + "type": "BOOLEAN", + "nullable": true, + "name": "ROW__IS__DELETED" } - ] + ], + "nullable": false }, - "rowCount": 115500 + "colStats": [ + { + "name": "inv_date_sk", + "ndv": 258, + "minValue": 2450815, + "maxValue": 2452635 + }, + { + "name": "inv_item_sk", + "ndv": 464811, + "minValue": 1, + "maxValue": 462000 + }, + { + "name": "inv_warehouse_sk", + "ndv": 27, + "minValue": 1, + "maxValue": 27 + }, + { + "name": "inv_quantity_on_hand", + "ndv": 987, + "minValue": 0, + "maxValue": 1000 + } + ] }, { - "id": "4", + "id": "3", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk", - "i_item_id" + "inv_date_sk", + "inv_item_sk", + "inv_warehouse_sk", + "inv_quantity_on_hand" ], "exprs": [ { @@ -478,41 +338,20 @@ { "input": 1, "name": "$1" - } - ], - "rowCount": 115500 - }, - { - "id": "5", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" }, - "operands": [ - { - "input": 4, - "name": "$4" - }, - { - "input": 1, - "name": "$1" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "1", - "4" + { + "input": 2, + "name": "$2" + }, + { + "input": 3, + "name": "$3" + } ], - "rowCount": 28202622525000 + "rowCount": 1627857000 }, { - "id": "6", + "id": "4", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", @@ -724,7 +563,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -865,7 +704,7 @@ ] }, { - "id": "7", + "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -920,7 +759,7 @@ "rowCount": 18262.25 }, { - "id": "8", + "id": "6", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_date_sk", @@ -976,7 +815,7 @@ "rowCount": 18262.25 }, { - "id": "9", + "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", "condition": { "op": { @@ -990,8 +829,8 @@ "name": "$0" }, { - "input": 6, - "name": "$6" + "input": 4, + "name": "$4" } ] }, @@ -999,106 +838,149 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "5", - "8" + "3", + "6" ], - "rowCount": 77256501481077184 + "rowCount": 4.4592497247375E12 }, { - "id": "10", + "id": "8", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "warehouse" + "item" ], - "table:alias": "warehouse", + "table:alias": "item", "inputs": [], - "rowCount": 27, - "avgRowSize": 679, + "rowCount": 462000, + "avgRowSize": 1033, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "w_warehouse_sk" + "name": "i_item_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "w_warehouse_id" + "name": "i_item_id" + }, + { + "type": "DATE", + "nullable": true, + "name": "i_rec_start_date" + }, + { + "type": "DATE", + "nullable": true, + "name": "i_rec_end_date" }, { "type": "VARCHAR", "nullable": true, - "precision": 20, - "name": "w_warehouse_name" + "precision": 200, + "name": "i_item_desc" + }, + { + "type": "DECIMAL", + "nullable": true, + "precision": 7, + "scale": 2, + "name": "i_current_price" + }, + { + "type": "DECIMAL", + "nullable": true, + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "i_brand_id" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 50, + "name": "i_brand" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "i_class_id" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 50, + "name": "i_class" }, { "type": "INTEGER", "nullable": true, - "name": "w_warehouse_sq_ft" + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "w_street_number" + "precision": 50, + "name": "i_category" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 60, - "name": "w_street_name" + "name": "i_manufact_id" }, { "type": "CHAR", "nullable": true, - "precision": 15, - "name": "w_street_type" + "precision": 50, + "name": "i_manufact" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "w_suite_number" + "precision": 20, + "name": "i_size" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 60, - "name": "w_city" + "precision": 20, + "name": "i_formulation" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 30, - "name": "w_county" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 2, - "name": "w_state" + "precision": 10, + "name": "i_units" }, { "type": "CHAR", "nullable": true, "precision": 10, - "name": "w_zip" + "name": "i_container" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 20, - "name": "w_country" + "name": "i_manager_id" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 5, - "scale": 2, - "name": "w_gmt_offset" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -1142,75 +1024,164 @@ }, "colStats": [ { - "name": "w_warehouse_sk", - "ndv": 27, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 27 + "maxValue": 462000 }, { - "name": "w_warehouse_name", - "ndv": 27 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "w_warehouse_id", - "ndv": 27 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "w_warehouse_sq_ft", - "ndv": 26, - "minValue": 73065, - "maxValue": 977787 + "name": "i_rec_start_date", + "ndv": 4, + "minValue": 10161, + "maxValue": 11622 }, { - "name": "w_street_number", - "ndv": 26 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "w_street_name", - "ndv": 27 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "w_street_type", - "ndv": 16 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "w_suite_number", - "ndv": 21 + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 }, { - "name": "w_city", - "ndv": 18 + "name": "i_brand", + "ndv": 742 }, { - "name": "w_county", - "ndv": 14 + "name": "i_class_id", + "ndv": 16, + "minValue": 1, + "maxValue": 16 }, { - "name": "w_state", - "ndv": 12 + "name": "i_class", + "ndv": 99 }, { - "name": "w_zip", - "ndv": 24 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "w_country", - "ndv": 1 + "name": "i_category", + "ndv": 11 }, { - "name": "w_gmt_offset", - "ndv": 4, - "minValue": -8, - "maxValue": -5 + "name": "i_manufact_id", + "ndv": 987, + "minValue": 1, + "maxValue": 1000 + }, + { + "name": "i_manufact", + "ndv": 1004 + }, + { + "name": "i_size", + "ndv": 8 + }, + { + "name": "i_formulation", + "ndv": 344236 + }, + { + "name": "i_color", + "ndv": 95 + }, + { + "name": "i_units", + "ndv": 21 + }, + { + "name": "i_container", + "ndv": 2 + }, + { + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 + }, + { + "name": "i_product_name", + "ndv": 461487 } ] }, { - "id": "11", + "id": "9", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + { + "input": 5, + "name": "$5" + }, + { + "literal": 0.99, + "type": { + "type": "DECIMAL", + "nullable": false, + "precision": 3, + "scale": 2 + } + }, + { + "literal": 1.49, + "type": { + "type": "DECIMAL", + "nullable": false, + "precision": 3, + "scale": 2 + } + } + ] + }, + "rowCount": 115500 + }, + { + "id": "10", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "w_warehouse_sk", - "w_warehouse_name" + "i_item_sk", + "i_item_id" ], "exprs": [ { @@ -1218,11 +1189,40 @@ "name": "$0" }, { - "input": 2, - "name": "$2" + "input": 1, + "name": "$1" } ], - "rowCount": 27 + "rowCount": 115500 + }, + { + "id": "11", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 7, + "name": "$7" + }, + { + "input": 1, + "name": "$1" + } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "7", + "10" + ], + "rowCount": 77256501481077184 }, { "id": "12", @@ -1235,12 +1235,12 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 4, + "name": "$4" }, { - "input": 9, - "name": "$9" + "input": 0, + "name": "$0" } ] }, @@ -1248,7 +1248,7 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "9", + "1", "11" ], "rowCount": 312888830998362560 @@ -1264,12 +1264,12 @@ ], "exprs": [ { - "input": 10, - "name": "$10" + "input": 1, + "name": "$1" }, { - "input": 5, - "name": "$5" + "input": 10, + "name": "$10" }, { "op": { @@ -1283,8 +1283,8 @@ "name": "$7" }, { - "input": 3, - "name": "$3" + "input": 5, + "name": "$5" }, { "literal": 0, @@ -1307,8 +1307,8 @@ "name": "$8" }, { - "input": 3, - "name": "$3" + "input": 5, + "name": "$5" }, { "literal": 0, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query22.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query22.q.out index 19a93afa895f..fec38e99d5ff 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query22.q.out @@ -348,7 +348,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -768,13 +768,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query23.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query23.q.out index 04100aaf066a..1b59a3b927b6 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query23.q.out @@ -752,7 +752,7 @@ Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 7' is a cross produc }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1621,7 +1621,7 @@ Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 7' is a cross produc }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2096,13 +2096,13 @@ Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 7' is a cross produc }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -3460,7 +3460,7 @@ Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 7' is a cross produc }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query24.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query24.q.out index b4f83a2da59e..884aa7a0d68a 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query24.q.out @@ -1578,13 +1578,13 @@ Warning: Map Join MAPJOIN[331][bigTable=?] in task 'Reducer 7' is a cross produc }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -2193,13 +2193,13 @@ Warning: Map Join MAPJOIN[331][bigTable=?] in task 'Reducer 7' is a cross produc }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query25.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query25.q.out index a177d91140c4..98b4db0d7f36 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query25.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query25.q.out @@ -762,7 +762,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1654,7 +1654,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2475,7 +2475,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -3187,13 +3187,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -3560,13 +3560,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query26.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query26.q.out index 1bba605f784f..4cfadf51792b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query26.q.out @@ -789,7 +789,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1803,13 +1803,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query27.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query27.q.out index 050a2012feb4..2633f9bec9b2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query27.q.out @@ -662,7 +662,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1368,13 +1368,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -1798,13 +1798,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query29.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query29.q.out index 0747e7fe790e..585e7a2c4c78 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query29.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query29.q.out @@ -756,7 +756,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1625,7 +1625,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2446,7 +2446,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -3158,13 +3158,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -3531,13 +3531,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query30.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query30.q.out index ae9c18e7e349..0adf8bfb0cd6 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query30.q.out @@ -1289,7 +1289,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query31.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query31.q.out index acd014f83e2d..f9fe6c674bb4 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query31.q.out @@ -984,7 +984,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query32.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query32.q.out index 42bff2ff3760..33a52bca4055 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query32.q.out @@ -533,139 +533,164 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "item" + "date_dim" ], - "table:alias": "item", + "table:alias": "date_dim", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "i_rec_start_date" + "name": "d_date" }, { - "type": "DATE", + "type": "INTEGER", "nullable": true, - "name": "i_rec_end_date" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 200, - "name": "i_item_desc" + "name": "d_week_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" + "name": "d_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" + "name": "d_year" }, { "type": "INTEGER", "nullable": true, - "name": "i_brand_id" + "name": "d_dow" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_brand" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "i_class_id" + "name": "d_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_class" + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "d_fy_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_category" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "i_manufact_id" + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 6, + "name": "d_quarter_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "i_container" + "name": "d_first_dom" }, { "type": "INTEGER", "nullable": true, - "name": "i_manager_id" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 1, + "name": "d_current_day" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_week" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_month" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_quarter" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -709,145 +734,213 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 }, { - "name": "i_manufact_id", - "ndv": 987, - "minValue": 1, - "maxValue": 1000 + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 }, { - "name": "i_item_id", - "ndv": 247524 + "name": "d_date_id", + "ndv": 71022 }, { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 }, { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 + "name": "d_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_item_desc", - "ndv": 341846 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "i_brand", - "ndv": 742 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 }, { - "name": "i_class_id", - "ndv": 16, + "name": "d_qoy", + "ndv": 4, "minValue": 1, - "maxValue": 16 + "maxValue": 4 }, { - "name": "i_class", - "ndv": 99 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_category_id", - "ndv": 10, + "name": "d_fy_quarter_seq", + "ndv": 808, "minValue": 1, - "maxValue": 10 + "maxValue": 801 }, { - "name": "i_category", - "ndv": 11 + "name": "d_fy_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "d_day_name", + "ndv": 7 }, { - "name": "i_size", - "ndv": 8 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "d_holiday", + "ndv": 2 }, { - "name": "i_color", - "ndv": 95 + "name": "d_weekend", + "ndv": 2 }, { - "name": "i_units", - "ndv": 21 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "i_container", - "ndv": 2 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "i_product_name", - "ndv": 461487 - } - ] - }, - { + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 + }, + { + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 + }, + { + "name": "d_current_day", + "ndv": 1 + }, + { + "name": "d_current_week", + "ndv": 1 + }, + { + "name": "d_current_month", + "ndv": 2 + }, + { + "name": "d_current_quarter", + "ndv": 2 + }, + { + "name": "d_current_year", + "ndv": 2 + } + ] + }, + { "id": "4", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "input": 13, - "name": "$13" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "literal": 269, + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "INTEGER", - "nullable": false + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 890179200000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + }, + { + "literal": 897955200000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 } } ] }, - "rowCount": 69300 + "rowCount": 18262.25 }, { "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk" + "d_date_sk" ], "exprs": [ { @@ -855,7 +948,7 @@ "name": "$0" } ], - "rowCount": 69300 + "rowCount": 18262.25 }, { "id": "6", @@ -872,8 +965,8 @@ "name": "$3" }, { - "input": 0, - "name": "$0" + "input": 2, + "name": "$2" } ] }, @@ -884,171 +977,146 @@ "2", "5" ], - "rowCount": 3.6210086773504875E14 + "rowCount": 9.542246135345445E13 }, { "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "item" ], - "table:alias": "date_dim", + "table:alias": "item", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 462000, + "avgRowSize": 1033, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "i_item_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "i_item_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_month_seq" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_week_seq" + "name": "i_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_quarter_seq" + "name": "i_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_year" + "precision": 200, + "name": "i_item_desc" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_dow" + "precision": 7, + "scale": 2, + "name": "i_current_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_moy" + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" }, { "type": "INTEGER", "nullable": true, - "name": "d_dom" + "name": "i_brand_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_qoy" + "precision": 50, + "name": "i_brand" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_year" + "name": "i_class_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 50, + "name": "i_class" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "i_category" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "i_manufact_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "i_manufact" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 20, + "name": "i_size" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_first_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_last_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_ly" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_lq" + "precision": 20, + "name": "i_formulation" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "i_units" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 10, + "name": "i_container" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "name": "i_manager_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -1092,150 +1160,112 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 - }, - { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 - }, - { - "name": "d_week_seq", - "ndv": 11297, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 10436 + "maxValue": 462000 }, { - "name": "d_quarter_seq", - "ndv": 808, + "name": "i_manufact_id", + "ndv": 987, "minValue": 1, - "maxValue": 801 - }, - { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 - }, - { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "maxValue": 1000 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "i_rec_start_date", + "ndv": 4, + "minValue": 10161, + "maxValue": 11622 }, { - "name": "d_qoy", - "ndv": 4, - "minValue": 1, - "maxValue": 4 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "d_fy_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "d_day_name", - "ndv": 7 + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "i_brand", + "ndv": 742 }, { - "name": "d_holiday", - "ndv": 2 + "name": "i_class_id", + "ndv": 16, + "minValue": 1, + "maxValue": 16 }, { - "name": "d_weekend", - "ndv": 2 + "name": "i_class", + "ndv": 99 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "i_category", + "ndv": 11 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "i_manufact", + "ndv": 1004 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "i_size", + "ndv": 8 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "i_formulation", + "ndv": 344236 }, { - "name": "d_current_day", - "ndv": 1 + "name": "i_color", + "ndv": 95 }, { - "name": "d_current_week", - "ndv": 1 + "name": "i_units", + "ndv": 21 }, { - "name": "d_current_month", + "name": "i_container", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_current_year", - "ndv": 2 + "name": "i_product_name", + "ndv": 461487 } ] }, @@ -1244,61 +1274,31 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 890179200000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } + "input": 13, + "name": "$13" }, { - "literal": 897955200000, + "literal": 269, "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 + "type": "INTEGER", + "nullable": false } } ] }, - "rowCount": 18262.25 + "rowCount": 69300 }, { "id": "9", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "i_item_sk" ], "exprs": [ { @@ -1306,7 +1306,7 @@ "name": "$0" } ], - "rowCount": 18262.25 + "rowCount": 69300 }, { "id": "10", @@ -1323,8 +1323,8 @@ "name": "$4" }, { - "input": 2, - "name": "$2" + "input": 0, + "name": "$0" } ] }, @@ -1436,7 +1436,7 @@ ] }, "inputs": [ - "7" + "3" ], "rowCount": 18262.25 }, @@ -1660,8 +1660,8 @@ "name": "$6" }, { - "input": 3, - "name": "$3" + "input": 4, + "name": "$4" } ] }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query33.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query33.q.out index bbc3b0a26663..6c17e30b04a5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query33.q.out @@ -635,7 +635,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1385,13 +1385,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -1731,13 +1731,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query34.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query34.q.out index 6d95c81d3c67..c5099314aaa3 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query34.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query34.q.out @@ -946,7 +946,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1786,13 +1786,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query35.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query35.q.out index 0c60c15871d2..f617a04ea056 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query35.q.out @@ -1125,7 +1125,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query36.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query36.q.out index 1e9851215e16..13619d080ef8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query36.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query36.q.out @@ -634,7 +634,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1069,13 +1069,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -1518,13 +1518,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query37.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query37.q.out index 9c2e95830475..f7d0a64c69f9 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query37.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query37.q.out @@ -483,164 +483,297 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "inventory" ], - "table:alias": "date_dim", + "table:alias": "inventory", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 1627857000, + "avgRowSize": 157, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "inv_date_sk" }, { - "type": "VARCHAR", + "type": "BIGINT", "nullable": false, - "precision": 2147483647, - "name": "d_date_id" + "name": "inv_item_sk" }, { - "type": "DATE", - "nullable": true, - "name": "d_date" + "type": "BIGINT", + "nullable": false, + "name": "inv_warehouse_sk" }, { "type": "INTEGER", "nullable": true, - "name": "d_month_seq" + "name": "inv_quantity_on_hand" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_week_seq" + "name": "BLOCK__OFFSET__INSIDE__FILE" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_quarter_seq" + "precision": 2147483647, + "name": "INPUT__FILE__NAME" }, { - "type": "INTEGER", + "fields": [ + { + "type": "BIGINT", + "nullable": true, + "name": "writeid" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "bucketid" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "rowid" + } + ], "nullable": true, - "name": "d_year" + "name": "ROW__ID" }, { - "type": "INTEGER", + "type": "BOOLEAN", "nullable": true, - "name": "d_dow" + "name": "ROW__IS__DELETED" + } + ], + "nullable": false + }, + "colStats": [ + { + "name": "inv_date_sk", + "ndv": 258, + "minValue": 2450815, + "maxValue": 2452635 + }, + { + "name": "inv_item_sk", + "ndv": 464811, + "minValue": 1, + "maxValue": 462000 + }, + { + "name": "inv_quantity_on_hand", + "ndv": 987, + "minValue": 0, + "maxValue": 1000 + }, + { + "name": "inv_warehouse_sk", + "ndv": 27, + "minValue": 1, + "maxValue": 27 + } + ] + }, + { + "id": "3", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "type": "INTEGER", + "input": 3, + "name": "$3" + }, + { + "literal": 100, + "type": { + "type": "INTEGER", + "nullable": false + } + }, + { + "literal": 500, + "type": { + "type": "INTEGER", + "nullable": false + } + } + ] + }, + "rowCount": 406964250 + }, + { + "id": "4", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", + "fields": [ + "inv_date_sk", + "inv_item_sk" + ], + "exprs": [ + { + "input": 0, + "name": "$0" + }, + { + "input": 1, + "name": "$1" + } + ], + "rowCount": 406964250 + }, + { + "id": "5", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", + "table": [ + "default", + "item" + ], + "table:alias": "item", + "inputs": [], + "rowCount": 462000, + "avgRowSize": 1033, + "rowType": { + "fields": [ + { + "type": "BIGINT", + "nullable": false, + "name": "i_item_sk" + }, + { + "type": "VARCHAR", + "nullable": false, + "precision": 2147483647, + "name": "i_item_id" + }, + { + "type": "DATE", "nullable": true, - "name": "d_moy" + "name": "i_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_dom" + "name": "i_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_qoy" + "precision": 200, + "name": "i_item_desc" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_fy_year" + "precision": 7, + "scale": 2, + "name": "i_current_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "i_brand_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "i_brand" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "i_class_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "i_class" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_weekend" + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" + "precision": 50, + "name": "i_category" }, { "type": "INTEGER", "nullable": true, - "name": "d_first_dom" + "name": "i_manufact_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_last_dom" + "precision": 50, + "name": "i_manufact" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_same_day_ly" + "precision": 20, + "name": "i_size" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_same_day_lq" + "precision": 20, + "name": "i_formulation" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "i_units" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 10, + "name": "i_container" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "name": "i_manager_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -684,367 +817,216 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 - }, - { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 - }, - { - "name": "d_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 - }, - { - "name": "d_quarter_seq", - "ndv": 808, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 801 + "maxValue": 462000 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "d_dom", - "ndv": 31, + "name": "i_manufact_id", + "ndv": 987, "minValue": 1, - "maxValue": 31 + "maxValue": 1000 }, { - "name": "d_qoy", + "name": "i_rec_start_date", "ndv": 4, - "minValue": 1, - "maxValue": 4 - }, - { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "minValue": 10161, + "maxValue": 11622 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "d_fy_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "d_day_name", - "ndv": 7 + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "i_brand", + "ndv": 742 }, { - "name": "d_holiday", - "ndv": 2 + "name": "i_class_id", + "ndv": 16, + "minValue": 1, + "maxValue": 16 }, { - "name": "d_weekend", - "ndv": 2 + "name": "i_class", + "ndv": 99 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "i_category", + "ndv": 11 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "i_manufact", + "ndv": 1004 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "i_size", + "ndv": 8 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "i_formulation", + "ndv": 344236 }, { - "name": "d_current_day", - "ndv": 1 + "name": "i_color", + "ndv": 95 }, { - "name": "d_current_week", - "ndv": 1 + "name": "i_units", + "ndv": 21 }, { - "name": "d_current_month", + "name": "i_container", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_current_year", - "ndv": 2 + "name": "i_product_name", + "ndv": 461487 } ] }, { - "id": "3", + "id": "6", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "AND", + "kind": "AND", + "syntax": "BINARY" }, "operands": [ - { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, { "op": { - "name": "CAST", - "kind": "CAST", + "name": "IN", + "kind": "OTHER_FUNCTION", "syntax": "SPECIAL" }, "operands": [ { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 991440000000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - }, - { - "literal": 996624000000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - } - ] - }, - "rowCount": 18262.25 - }, - { - "id": "4", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", - "fields": [ - "d_date_sk" - ], - "exprs": [ - { - "input": 0, - "name": "$0" - } - ], - "rowCount": 18262.25 - }, - { - "id": "5", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", - "table": [ - "default", - "inventory" - ], - "table:alias": "inventory", - "inputs": [], - "rowCount": 1627857000, - "avgRowSize": 157, - "rowType": { - "fields": [ - { - "type": "BIGINT", - "nullable": false, - "name": "inv_date_sk" - }, - { - "type": "BIGINT", - "nullable": false, - "name": "inv_item_sk" - }, - { - "type": "BIGINT", - "nullable": false, - "name": "inv_warehouse_sk" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "inv_quantity_on_hand" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "BLOCK__OFFSET__INSIDE__FILE" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 2147483647, - "name": "INPUT__FILE__NAME" - }, - { - "fields": [ + "input": 13, + "name": "$13" + }, { - "type": "BIGINT", - "nullable": true, - "name": "writeid" + "literal": 678, + "type": { + "type": "INTEGER", + "nullable": false + } }, { - "type": "INTEGER", - "nullable": true, - "name": "bucketid" + "literal": 849, + "type": { + "type": "INTEGER", + "nullable": false + } }, { - "type": "BIGINT", - "nullable": true, - "name": "rowid" + "literal": 918, + "type": { + "type": "INTEGER", + "nullable": false + } + }, + { + "literal": 964, + "type": { + "type": "INTEGER", + "nullable": false + } } - ], - "nullable": true, - "name": "ROW__ID" - }, - { - "type": "BOOLEAN", - "nullable": true, - "name": "ROW__IS__DELETED" - } - ], - "nullable": false - }, - "colStats": [ - { - "name": "inv_date_sk", - "ndv": 258, - "minValue": 2450815, - "maxValue": 2452635 - }, - { - "name": "inv_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 - }, - { - "name": "inv_quantity_on_hand", - "ndv": 987, - "minValue": 0, - "maxValue": 1000 - }, - { - "name": "inv_warehouse_sk", - "ndv": 27, - "minValue": 1, - "maxValue": 27 - } - ] - }, - { - "id": "6", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" - }, - "operands": [ - { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "input": 3, - "name": "$3" - }, - { - "literal": 100, - "type": { - "type": "INTEGER", - "nullable": false - } + ] }, { - "literal": 500, - "type": { - "type": "INTEGER", - "nullable": false - } + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + { + "input": 5, + "name": "$5" + }, + { + "literal": 22, + "type": { + "type": "DECIMAL", + "nullable": false, + "precision": 12, + "scale": 2 + } + }, + { + "literal": 52, + "type": { + "type": "DECIMAL", + "nullable": false, + "precision": 12, + "scale": 2 + } + } + ] } ] }, - "rowCount": 406964250 + "rowCount": 28875 }, { "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "inv_date_sk", - "inv_item_sk" + "i_item_sk", + "i_item_id", + "i_item_desc", + "i_current_price" ], "exprs": [ { @@ -1054,148 +1036,210 @@ { "input": 1, "name": "$1" + }, + { + "input": 4, + "name": "$4" + }, + { + "input": 5, + "name": "$5" } ], - "rowCount": 406964250 + "rowCount": 28875 }, { "id": "8", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 1, + "name": "$1" + }, + { + "input": 2, + "name": "$2" + } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "4", + "7" + ], + "rowCount": 1.7626639078125E12 + }, + { + "id": "9", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "item" + "date_dim" ], - "table:alias": "item", + "table:alias": "date_dim", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "i_rec_start_date" + "name": "d_date" }, { - "type": "DATE", + "type": "INTEGER", "nullable": true, - "name": "i_rec_end_date" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 200, - "name": "i_item_desc" + "name": "d_week_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" + "name": "d_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" + "name": "d_year" }, { "type": "INTEGER", "nullable": true, - "name": "i_brand_id" + "name": "d_dow" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_brand" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "i_class_id" + "name": "d_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_class" + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "d_fy_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_category" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "i_manufact_id" + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 6, + "name": "d_quarter_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "i_container" + "name": "d_first_dom" }, { "type": "INTEGER", "nullable": true, - "name": "i_manager_id" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 1, + "name": "d_current_day" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_week" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_month" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_quarter" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -1239,239 +1283,224 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 + }, + { + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 + }, + { + "name": "d_date_id", + "ndv": 71022 + }, + { + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 + }, + { + "name": "d_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 462000 + "maxValue": 10436 }, { - "name": "i_item_id", - "ndv": 247524 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_item_desc", - "ndv": 341846 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "i_manufact_id", - "ndv": 987, + "name": "d_moy", + "ndv": 12, "minValue": 1, - "maxValue": 1000 + "maxValue": 12 }, { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 + }, + { + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "name": "d_fy_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "d_fy_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_brand", - "ndv": 742 + "name": "d_day_name", + "ndv": 7 }, { - "name": "i_class_id", - "ndv": 16, - "minValue": 1, - "maxValue": 16 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "i_class", - "ndv": 99 + "name": "d_holiday", + "ndv": 2 }, { - "name": "i_category_id", - "ndv": 10, - "minValue": 1, - "maxValue": 10 + "name": "d_weekend", + "ndv": 2 }, { - "name": "i_category", - "ndv": 11 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "i_size", - "ndv": 8 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 }, { - "name": "i_color", - "ndv": 95 + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 }, { - "name": "i_units", - "ndv": 21 + "name": "d_current_day", + "ndv": 1 }, { - "name": "i_container", + "name": "d_current_week", + "ndv": 1 + }, + { + "name": "d_current_month", "ndv": 2 }, { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "name": "d_current_quarter", + "ndv": 2 }, { - "name": "i_product_name", - "ndv": 461487 + "name": "d_current_year", + "ndv": 2 } ] }, { - "id": "9", + "id": "10", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "AND", - "kind": "AND", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "op": { - "name": "IN", - "kind": "OTHER_FUNCTION", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 13, - "name": "$13" - }, - { - "literal": 678, - "type": { - "type": "INTEGER", - "nullable": false - } - }, - { - "literal": 849, - "type": { - "type": "INTEGER", - "nullable": false - } - }, - { - "literal": 918, - "type": { - "type": "INTEGER", - "nullable": false - } - }, - { - "literal": 964, - "type": { - "type": "INTEGER", - "nullable": false - } - } - ] + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", + "name": "CAST", + "kind": "CAST", "syntax": "SPECIAL" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "input": 5, - "name": "$5" - }, - { - "literal": 22, - "type": { - "type": "DECIMAL", - "nullable": false, - "precision": 12, - "scale": 2 - } - }, - { - "literal": 52, - "type": { - "type": "DECIMAL", - "nullable": false, - "precision": 12, - "scale": 2 - } + "input": 2, + "name": "$2" } - ] + ], + "type": { + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 991440000000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + }, + { + "literal": 996624000000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } } ] }, - "rowCount": 28875 + "rowCount": 18262.25 }, { - "id": "10", + "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk", - "i_item_id", - "i_item_desc", - "i_current_price" + "d_date_sk" ], "exprs": [ { "input": 0, "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, - { - "input": 4, - "name": "$4" - }, - { - "input": 5, - "name": "$5" } ], - "rowCount": 28875 + "rowCount": 18262.25 }, { - "id": "11", + "id": "12", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", "condition": { "op": { @@ -1481,41 +1510,12 @@ }, "operands": [ { - "input": 1, - "name": "$1" + "input": 6, + "name": "$6" }, - { - "input": 2, - "name": "$2" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "7", - "10" - ], - "rowCount": 1.7626639078125E12 - }, - { - "id": "12", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ { "input": 0, "name": "$0" - }, - { - "input": 1, - "name": "$1" } ] }, @@ -1523,7 +1523,7 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "4", + "8", "11" ], "rowCount": 4828531342567324 @@ -1543,8 +1543,8 @@ "name": "$0" }, { - "input": 4, - "name": "$4" + "input": 3, + "name": "$3" } ] }, @@ -1561,9 +1561,9 @@ "id": "14", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ + 4, 5, - 6, - 7 + 6 ], "aggs": [], "rowCount": 3.130399511396205E24 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query38.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query38.q.out index bd9adfee5f0f..2ceb58dd502a 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query38.q.out @@ -879,7 +879,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query39.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query39.q.out index 4c127cd7fb3e..4933f902e512 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query39.q.out @@ -359,7 +359,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query40.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query40.q.out index c30f19915077..4dd0c6c7eaf6 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query40.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query40.q.out @@ -996,139 +996,164 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "item" + "date_dim" ], - "table:alias": "item", + "table:alias": "date_dim", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "i_rec_start_date" + "name": "d_date" }, { - "type": "DATE", + "type": "INTEGER", "nullable": true, - "name": "i_rec_end_date" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 200, - "name": "i_item_desc" + "name": "d_week_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" + "name": "d_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" + "name": "d_year" }, { "type": "INTEGER", "nullable": true, - "name": "i_brand_id" + "name": "d_dow" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_brand" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "i_class_id" + "name": "d_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_class" + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "d_fy_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_category" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "i_manufact_id" + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 6, + "name": "d_quarter_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "i_container" + "name": "d_first_dom" }, { "type": "INTEGER", "nullable": true, - "name": "i_manager_id" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 1, + "name": "d_current_day" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_week" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_month" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_quarter" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -1172,122 +1197,160 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 - }, - { - "name": "i_item_id", - "ndv": 247524 + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 }, { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 }, { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 + "name": "d_date_id", + "ndv": 71022 }, { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 }, { - "name": "i_item_desc", - "ndv": 341846 + "name": "d_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_brand", - "ndv": 742 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "i_class_id", - "ndv": 16, + "name": "d_moy", + "ndv": 12, "minValue": 1, - "maxValue": 16 + "maxValue": 12 }, { - "name": "i_class", - "ndv": 99 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 }, { - "name": "i_category_id", - "ndv": 10, + "name": "d_qoy", + "ndv": 4, "minValue": 1, - "maxValue": 10 + "maxValue": 4 }, { - "name": "i_category", - "ndv": 11 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_manufact_id", - "ndv": 987, + "name": "d_fy_quarter_seq", + "ndv": 808, "minValue": 1, - "maxValue": 1000 + "maxValue": 801 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "d_fy_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_size", - "ndv": 8 + "name": "d_day_name", + "ndv": 7 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "i_color", - "ndv": 95 + "name": "d_holiday", + "ndv": 2 }, { - "name": "i_units", - "ndv": 21 + "name": "d_weekend", + "ndv": 2 }, { - "name": "i_container", + "name": "d_following_holiday", "ndv": 2 }, { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "i_product_name", - "ndv": 461487 - } - ] - }, - { - "id": "7", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "BETWEEN", - "kind": "BETWEEN", + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 + }, + { + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 + }, + { + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 + }, + { + "name": "d_current_day", + "ndv": 1 + }, + { + "name": "d_current_week", + "ndv": 1 + }, + { + "name": "d_current_month", + "ndv": 2 + }, + { + "name": "d_current_quarter", + "ndv": 2 + }, + { + "name": "d_current_year", + "ndv": 2 + } + ] + }, + { + "id": "7", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", "syntax": "SPECIAL" }, "operands": [ @@ -1299,37 +1362,50 @@ } }, { - "input": 5, - "name": "$5" + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], + "type": { + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } }, { - "literal": 0.99, + "literal": 889401600000, "type": { - "type": "DECIMAL", + "type": "TIMESTAMP", "nullable": false, - "precision": 3, - "scale": 2 + "precision": 9 } }, { - "literal": 1.49, + "literal": 894585600000, "type": { - "type": "DECIMAL", + "type": "TIMESTAMP", "nullable": false, - "precision": 3, - "scale": 2 + "precision": 9 } } ] }, - "rowCount": 115500 + "rowCount": 18262.25 }, { "id": "8", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk", - "i_item_id" + "d_date_sk", + "EXPR$0", + "EXPR$1" ], "exprs": [ { @@ -1337,11 +1413,47 @@ "name": "$0" }, { - "input": 1, - "name": "$1" + "op": { + "name": "<", + "kind": "LESS_THAN", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 2, + "name": "$2" + }, + { + "literal": 10324, + "type": { + "type": "DATE", + "nullable": false + } + } + ] + }, + { + "op": { + "name": ">=", + "kind": "GREATER_THAN_OR_EQUAL", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 2, + "name": "$2" + }, + { + "literal": 10324, + "type": { + "type": "DATE", + "nullable": false + } + } + ] } ], - "rowCount": 115500 + "rowCount": 18262.25 }, { "id": "9", @@ -1354,12 +1466,12 @@ }, "operands": [ { - "input": 8, - "name": "$8" + "input": 4, + "name": "$4" }, { - "input": 1, - "name": "$1" + "input": 8, + "name": "$8" } ] }, @@ -1370,171 +1482,146 @@ "5", "8" ], - "rowCount": 5.867364971418929E22 + "rowCount": 9.277167614657605E21 }, { "id": "10", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "item" ], - "table:alias": "date_dim", + "table:alias": "item", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 462000, + "avgRowSize": 1033, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "i_item_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "i_item_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_month_seq" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_week_seq" + "name": "i_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_quarter_seq" + "name": "i_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_year" + "precision": 200, + "name": "i_item_desc" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_dow" + "precision": 7, + "scale": 2, + "name": "i_current_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_moy" + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" }, { "type": "INTEGER", "nullable": true, - "name": "d_dom" + "name": "i_brand_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_qoy" + "precision": 50, + "name": "i_brand" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_year" + "name": "i_class_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 50, + "name": "i_class" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "i_category" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "i_manufact_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "i_manufact" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 20, + "name": "i_size" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_first_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_last_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_ly" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_lq" + "precision": 20, + "name": "i_formulation" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "i_units" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 10, + "name": "i_container" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "name": "i_manager_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -1578,150 +1665,112 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 - }, - { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 - }, - { - "name": "d_week_seq", - "ndv": 11297, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 10436 + "maxValue": 462000 }, { - "name": "d_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "i_rec_start_date", + "ndv": 4, + "minValue": 10161, + "maxValue": 11622 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "d_qoy", - "ndv": 4, - "minValue": 1, - "maxValue": 4 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "i_brand", + "ndv": 742 }, { - "name": "d_fy_week_seq", - "ndv": 11297, + "name": "i_class_id", + "ndv": 16, "minValue": 1, - "maxValue": 10436 - }, - { - "name": "d_day_name", - "ndv": 7 - }, - { - "name": "d_quarter_name", - "ndv": 800 + "maxValue": 16 }, { - "name": "d_holiday", - "ndv": 2 + "name": "i_class", + "ndv": 99 }, { - "name": "d_weekend", - "ndv": 2 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "i_category", + "ndv": 11 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "i_manufact_id", + "ndv": 987, + "minValue": 1, + "maxValue": 1000 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "i_manufact", + "ndv": 1004 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "i_size", + "ndv": 8 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "i_formulation", + "ndv": 344236 }, { - "name": "d_current_day", - "ndv": 1 + "name": "i_color", + "ndv": 95 }, { - "name": "d_current_week", - "ndv": 1 + "name": "i_units", + "ndv": 21 }, { - "name": "d_current_month", + "name": "i_container", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_current_year", - "ndv": 2 + "name": "i_product_name", + "ndv": 461487 } ] }, @@ -1743,50 +1792,37 @@ } }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } + "input": 5, + "name": "$5" }, { - "literal": 889401600000, + "literal": 0.99, "type": { - "type": "TIMESTAMP", + "type": "DECIMAL", "nullable": false, - "precision": 9 + "precision": 3, + "scale": 2 } }, { - "literal": 894585600000, + "literal": 1.49, "type": { - "type": "TIMESTAMP", + "type": "DECIMAL", "nullable": false, - "precision": 9 + "precision": 3, + "scale": 2 } } ] }, - "rowCount": 18262.25 + "rowCount": 115500 }, { "id": "12", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk", - "EXPR$0", - "EXPR$1" + "i_item_sk", + "i_item_id" ], "exprs": [ { @@ -1794,47 +1830,11 @@ "name": "$0" }, { - "op": { - "name": "<", - "kind": "LESS_THAN", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 2, - "name": "$2" - }, - { - "literal": 10324, - "type": { - "type": "DATE", - "nullable": false - } - } - ] - }, - { - "op": { - "name": ">=", - "kind": "GREATER_THAN_OR_EQUAL", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 2, - "name": "$2" - }, - { - "literal": 10324, - "type": { - "type": "DATE", - "nullable": false - } - } - ] + "input": 1, + "name": "$1" } ], - "rowCount": 18262.25 + "rowCount": 115500 }, { "id": "13", @@ -1847,12 +1847,12 @@ }, "operands": [ { - "input": 4, - "name": "$4" + "input": 11, + "name": "$11" }, { - "input": 10, - "name": "$10" + "input": 1, + "name": "$1" } ] }, @@ -1863,7 +1863,7 @@ "9", "12" ], - "rowCount": 1.60726928923943E26 + "rowCount": 1.6072692892394298E26 }, { "id": "14", @@ -2112,7 +2112,7 @@ "13", "15" ], - "rowCount": 6.509440621419692E26 + "rowCount": 6.509440621419691E26 }, { "id": "17", @@ -2129,8 +2129,8 @@ "name": "$14" }, { - "input": 9, - "name": "$9" + "input": 12, + "name": "$12" }, { "op": { @@ -2140,8 +2140,8 @@ }, "operands": [ { - "input": 11, - "name": "$11" + "input": 9, + "name": "$9" }, { "op": { @@ -2210,8 +2210,8 @@ }, "operands": [ { - "input": 12, - "name": "$12" + "input": 10, + "name": "$10" }, { "op": { @@ -2273,7 +2273,7 @@ ] } ], - "rowCount": 6.509440621419692E26 + "rowCount": 6.509440621419691E26 }, { "id": "18", @@ -2320,7 +2320,7 @@ "name": null } ], - "rowCount": 6.509440621419692E25 + "rowCount": 6.50944062141969E25 }, { "id": "19", @@ -2349,7 +2349,7 @@ "name": "$3" } ], - "rowCount": 6.509440621419692E25 + "rowCount": 6.50944062141969E25 }, { "id": "20", diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query41.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query41.q.out index 8bca60c4d564..ee37027b55f2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query41.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query41.q.out @@ -207,13 +207,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -583,13 +583,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query42.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query42.q.out index 0223e837411a..db719a75c7cf 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query42.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query42.q.out @@ -608,7 +608,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1037,13 +1037,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query43.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query43.q.out index f3b51680a9c2..39bad00d4605 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query43.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query43.q.out @@ -628,7 +628,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1219,13 +1219,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query44.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query44.q.out index 7cb719ae2c10..cc12d11eb5b5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query44.q.out @@ -198,13 +198,13 @@ Warning: Map Join MAPJOIN[112][bigTable=?] in task 'Reducer 2' is a cross produc }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -2261,13 +2261,13 @@ Warning: Map Join MAPJOIN[112][bigTable=?] in task 'Reducer 2' is a cross produc }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query45.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query45.q.out index 499659b88d38..69f67c983ba2 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query45.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query45.q.out @@ -1288,7 +1288,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1816,13 +1816,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query46.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query46.q.out index 6dd5d935941d..b1ac9339a1e7 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query46.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query46.q.out @@ -1434,7 +1434,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1913,13 +1913,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query47.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query47.q.out index a9d9f441a827..a1d5d7542c1a 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query47.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query47.q.out @@ -635,7 +635,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1202,13 +1202,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -1615,13 +1615,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query48.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query48.q.out index 327aeae24b63..b5c6c6dad3b6 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query48.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query48.q.out @@ -1482,7 +1482,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query49.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query49.q.out index eba8caaac781..ccaef181a9da 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query49.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query49.q.out @@ -1220,7 +1220,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query50.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query50.q.out index b3628b962db8..65724838bad1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query50.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query50.q.out @@ -1019,7 +1019,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1587,13 +1587,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query51.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query51.q.out index 0bdac052f316..b7ea63cd9043 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query51.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query51.q.out @@ -592,7 +592,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query52.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query52.q.out index 416252d7288c..e20b050f8e7d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query52.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query52.q.out @@ -608,7 +608,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1037,13 +1037,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query53.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query53.q.out index 1b1b008d3785..deeb7d2d0a79 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query53.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query53.q.out @@ -611,13 +611,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -1478,7 +1478,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query54.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query54.q.out index 3821417ab4ad..78b5ea15ceb4 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query54.q.out @@ -640,7 +640,7 @@ Warning: Map Join MAPJOIN[286][bigTable=?] in task 'Map 1' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1658,13 +1658,13 @@ Warning: Map Join MAPJOIN[286][bigTable=?] in task 'Map 1' is a cross product }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -3480,7 +3480,7 @@ Warning: Map Join MAPJOIN[286][bigTable=?] in task 'Map 1' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -3913,13 +3913,13 @@ Warning: Map Join MAPJOIN[286][bigTable=?] in task 'Map 1' is a cross product }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -4481,7 +4481,7 @@ Warning: Map Join MAPJOIN[286][bigTable=?] in task 'Map 1' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query55.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query55.q.out index c16944da8961..efcd9d4a4ad0 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query55.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query55.q.out @@ -608,7 +608,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1037,13 +1037,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query56.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query56.q.out index b9950df6733b..a256e47e96fc 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query56.q.out @@ -635,7 +635,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1375,13 +1375,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -1717,13 +1717,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query57.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query57.q.out index 691a9a0ba6fb..d145fb626734 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query57.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query57.q.out @@ -762,7 +762,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1336,13 +1336,13 @@ }, { "name": "cc_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10227, "maxValue": 11688 }, { "name": "cc_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10957, "maxValue": 11687 }, @@ -1736,13 +1736,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out index 1cb25790654e..ff9ac3ce98d7 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query58.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[380][bigTable=?] in task 'Reducer 6' is a cross product { "CBOPlan": { "rels": [ @@ -720,7 +721,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1143,7 +1144,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1555,7 +1556,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1793,25 +1794,6 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "id": "16", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "9", - "15" - ], - "rowCount": 59169.69 - }, - { - "id": "17", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -1861,7 +1843,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "rowCount": 9861.615 }, { - "id": "18", + "id": "17", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -1874,6 +1856,25 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product ], "rowCount": 9861.615 }, + { + "id": "18", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "15", + "17" + ], + "rowCount": 9861.615 + }, { "id": "19", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -1898,7 +1899,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "algorithm": "none", "cost": "not available", "inputs": [ - "16", + "9", "18" ], "rowCount": 8.75263053674025E7 @@ -2136,13 +2137,13 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -3116,25 +3117,6 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "id": "40", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "34", - "39" - ], - "rowCount": 59169.69 - }, - { - "id": "41", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -3184,7 +3166,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "rowCount": 9861.615 }, { - "id": "42", + "id": "41", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -3197,6 +3179,25 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product ], "rowCount": 9861.615 }, + { + "id": "42", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "39", + "41" + ], + "rowCount": 9861.615 + }, { "id": "43", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -3221,7 +3222,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "algorithm": "none", "cost": "not available", "inputs": [ - "40", + "34", "42" ], "rowCount": 8.75263053674025E7 @@ -3979,25 +3980,6 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product }, { "id": "63", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "literal": true, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "57", - "62" - ], - "rowCount": 59169.69 - }, - { - "id": "64", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -4047,7 +4029,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "rowCount": 9861.615 }, { - "id": "65", + "id": "64", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_week_seq" @@ -4060,6 +4042,25 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product ], "rowCount": 9861.615 }, + { + "id": "65", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "literal": true, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "62", + "64" + ], + "rowCount": 9861.615 + }, { "id": "66", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", @@ -4084,7 +4085,7 @@ Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product "algorithm": "none", "cost": "not available", "inputs": [ - "63", + "57", "65" ], "rowCount": 8.75263053674025E7 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query59.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query59.q.out index 1b4328178247..0edbc184ccba 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query59.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query59.q.out @@ -238,13 +238,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -619,13 +619,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -1422,7 +1422,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2396,7 +2396,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query60.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query60.q.out index d49ab925bab4..e8e98fb578e7 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query60.q.out @@ -635,7 +635,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1375,13 +1375,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -1717,13 +1717,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query61.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query61.q.out index b9b3de739c01..68ca850436a8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query61.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query61.q.out @@ -1215,7 +1215,7 @@ Warning: Map Join MAPJOIN[249][bigTable=?] in task 'Reducer 6' is a cross produc }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1632,13 +1632,13 @@ Warning: Map Join MAPJOIN[249][bigTable=?] in task 'Reducer 6' is a cross produc }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -2034,13 +2034,13 @@ Warning: Map Join MAPJOIN[249][bigTable=?] in task 'Reducer 6' is a cross produc }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query62.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query62.q.out index ed070f5bfee4..45c5ad5fb860 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query62.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query62.q.out @@ -1258,7 +1258,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2136,13 +2136,13 @@ }, { "name": "web_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10089, "maxValue": 11550 }, { "name": "web_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10819, "maxValue": 11549 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query63.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query63.q.out index dc1732b94816..969b6c059ac7 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query63.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query63.q.out @@ -611,13 +611,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -1478,7 +1478,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query64.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query64.q.out index af4bc172d3bc..370dcc8fd0e5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query64.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query64.q.out @@ -1616,13 +1616,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -2084,7 +2084,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -3731,13 +3731,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -4509,7 +4509,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -4910,7 +4910,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query65.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query65.q.out index 27059885129c..b4f4eda7f0e3 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query65.q.out @@ -238,13 +238,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -1009,7 +1009,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1977,13 +1977,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query66.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query66.q.out index c37a012e9090..f0ff6d58f024 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query66.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query66.q.out @@ -1097,7 +1097,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query67.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query67.q.out index ceeb913daeb0..f0cbd12e2bbc 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query67.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query67.q.out @@ -743,7 +743,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1185,13 +1185,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -1569,13 +1569,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query68.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query68.q.out index be608888ed6e..73d53918eddd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query68.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query68.q.out @@ -1439,7 +1439,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1925,13 +1925,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query69.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query69.q.out index 3b413dc4a442..bb551f9e1e30 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query69.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query69.q.out @@ -1215,7 +1215,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query70.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query70.q.out index 96f32cd87421..849b5e140982 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query70.q.out @@ -624,7 +624,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1082,13 +1082,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -1563,7 +1563,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2008,13 +2008,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query71.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query71.q.out index 71135cb5ad00..63bc9cf602cd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query71.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query71.q.out @@ -762,7 +762,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2435,13 +2435,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query72.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query72.q.out index 6085a1a1c228..91b7bb99f23b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query72.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query72.q.out @@ -1361,7 +1361,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1741,7 +1741,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2655,7 +2655,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -3899,13 +3899,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query73.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query73.q.out index ef77eb5759f7..5175001d564f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query73.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query73.q.out @@ -946,7 +946,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1743,13 +1743,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query74.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query74.q.out index aa8ff619b07e..10069bae810b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query74.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query74.q.out @@ -624,7 +624,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query75.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query75.q.out index fdb234de5b4b..33cf9fafff8b 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query75.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query75.q.out @@ -1142,7 +1142,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1560,13 +1560,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query76.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query76.q.out index c7744c4fcba9..b4298028a920 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query76.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query76.q.out @@ -230,7 +230,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1003,13 +1003,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query77.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query77.q.out index 3eaad4c86418..b12c95eb69df 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query77.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query77.q.out @@ -620,7 +620,7 @@ Warning: Map Join MAPJOIN[213][bigTable=?] in task 'Reducer 9' is a cross produc }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query78.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query78.q.out index c98085c3a560..dd02301e6752 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query78.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query78.q.out @@ -1052,7 +1052,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query79.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query79.q.out index 8f26c5730591..b023b469f9a9 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query79.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query79.q.out @@ -951,7 +951,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1429,13 +1429,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query80.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query80.q.out index 567b50117fe2..93bf3a645ee0 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query80.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query80.q.out @@ -820,139 +820,164 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "item" + "date_dim" ], - "table:alias": "item", + "table:alias": "date_dim", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "i_rec_start_date" + "name": "d_date" }, { - "type": "DATE", + "type": "INTEGER", "nullable": true, - "name": "i_rec_end_date" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 200, - "name": "i_item_desc" + "name": "d_week_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" + "name": "d_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" + "name": "d_year" }, { "type": "INTEGER", "nullable": true, - "name": "i_brand_id" + "name": "d_dow" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_brand" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "i_class_id" + "name": "d_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_class" + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "d_fy_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_category" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "i_manufact_id" + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 6, + "name": "d_quarter_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "i_container" + "name": "d_first_dom" }, { "type": "INTEGER", "nullable": true, - "name": "i_manager_id" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 1, + "name": "d_current_day" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_week" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_month" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_quarter" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -996,112 +1021,150 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 }, { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 }, { - "name": "i_item_id", - "ndv": 247524 + "name": "d_date_id", + "ndv": 71022 }, { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 }, { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 + "name": "d_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_item_desc", - "ndv": 341846 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "i_brand", - "ndv": 742 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "i_class_id", - "ndv": 16, + "name": "d_dom", + "ndv": 31, "minValue": 1, - "maxValue": 16 + "maxValue": 31 }, { - "name": "i_class", - "ndv": 99 + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "i_category_id", - "ndv": 10, - "minValue": 1, - "maxValue": 10 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_category", - "ndv": 11 + "name": "d_fy_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_manufact_id", - "ndv": 987, + "name": "d_fy_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 1000 + "maxValue": 10436 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "d_day_name", + "ndv": 7 }, { - "name": "i_size", - "ndv": 8 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "d_holiday", + "ndv": 2 }, { - "name": "i_color", - "ndv": 95 + "name": "d_weekend", + "ndv": 2 }, { - "name": "i_units", - "ndv": 21 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "i_container", + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 + }, + { + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 + }, + { + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 + }, + { + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 + }, + { + "name": "d_current_day", + "ndv": 1 + }, + { + "name": "d_current_week", + "ndv": 1 + }, + { + "name": "d_current_month", "ndv": 2 }, { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "name": "d_current_quarter", + "ndv": 2 }, { - "name": "i_product_name", - "ndv": 461487 + "name": "d_current_year", + "ndv": 2 } ] }, @@ -1110,33 +1173,61 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": ">", - "kind": "GREATER_THAN", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "input": 5, - "name": "$5" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "literal": 50, + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "DECIMAL", + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 902188800000, + "type": { + "type": "TIMESTAMP", "nullable": false, - "precision": 2, - "scale": 0 + "precision": 9 + } + }, + { + "literal": 904780800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 } } ] }, - "rowCount": 231000 + "rowCount": 18262.25 }, { "id": "8", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk" + "d_date_sk" ], "exprs": [ { @@ -1144,7 +1235,7 @@ "name": "$0" } ], - "rowCount": 231000 + "rowCount": 18262.25 }, { "id": "9", @@ -1157,8 +1248,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 6, + "name": "$6" }, { "input": 11, @@ -1173,130 +1264,146 @@ "5", "8" ], - "rowCount": 4.048973649522101E23 + "rowCount": 3.2010116463629865E22 }, { "id": "10", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "promotion" + "item" ], - "table:alias": "promotion", + "table:alias": "item", "inputs": [], - "rowCount": 2300, - "avgRowSize": 517, + "rowCount": 462000, + "avgRowSize": 1033, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "p_promo_sk" + "name": "i_item_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "p_promo_id" + "name": "i_item_id" }, { - "type": "BIGINT", + "type": "DATE", "nullable": true, - "name": "p_start_date_sk" + "name": "i_rec_start_date" }, { - "type": "BIGINT", + "type": "DATE", "nullable": true, - "name": "p_end_date_sk" + "name": "i_rec_end_date" }, { - "type": "BIGINT", + "type": "VARCHAR", "nullable": true, - "name": "p_item_sk" + "precision": 200, + "name": "i_item_desc" }, { "type": "DECIMAL", "nullable": true, - "precision": 15, + "precision": 7, "scale": 2, - "name": "p_cost" + "name": "i_current_price" + }, + { + "type": "DECIMAL", + "nullable": true, + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" }, { "type": "INTEGER", "nullable": true, - "name": "p_response_target" + "name": "i_brand_id" }, { "type": "CHAR", "nullable": true, "precision": 50, - "name": "p_promo_name" + "name": "i_brand" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "p_channel_dmail" + "name": "i_class_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "p_channel_email" + "precision": 50, + "name": "i_class" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "p_channel_catalog" + "precision": 50, + "name": "i_category" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "i_manufact_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "p_channel_tv" + "precision": 50, + "name": "i_manufact" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "p_channel_radio" + "precision": 20, + "name": "i_size" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "p_channel_press" + "precision": 20, + "name": "i_formulation" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "p_channel_event" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "p_channel_demo" + "precision": 10, + "name": "i_units" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 100, - "name": "p_channel_details" + "precision": 10, + "name": "i_container" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 15, - "name": "p_purpose" + "name": "i_manager_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "p_discount_active" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -1340,92 +1447,112 @@ }, "colStats": [ { - "name": "p_promo_sk", - "ndv": 2365, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 2300 + "maxValue": 462000 }, { - "name": "p_channel_tv", - "ndv": 2 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "p_promo_id", - "ndv": 2307 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "p_start_date_sk", - "ndv": 761, - "minValue": 2450096, - "maxValue": 2450915 + "name": "i_rec_start_date", + "ndv": 4, + "minValue": 10161, + "maxValue": 11622 }, { - "name": "p_end_date_sk", - "ndv": 736, - "minValue": 2450102, - "maxValue": 2450970 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "p_item_sk", - "ndv": 2252, - "minValue": 614, - "maxValue": 461932 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "p_cost", - "ndv": 1, - "minValue": 1000, - "maxValue": 1000 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "p_response_target", - "ndv": 1, + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 + }, + { + "name": "i_brand", + "ndv": 742 + }, + { + "name": "i_class_id", + "ndv": 16, "minValue": 1, - "maxValue": 1 + "maxValue": 16 }, { - "name": "p_promo_name", - "ndv": 11 + "name": "i_class", + "ndv": 99 }, { - "name": "p_channel_dmail", - "ndv": 3 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "p_channel_email", - "ndv": 2 + "name": "i_category", + "ndv": 11 }, { - "name": "p_channel_catalog", - "ndv": 2 + "name": "i_manufact_id", + "ndv": 987, + "minValue": 1, + "maxValue": 1000 }, { - "name": "p_channel_radio", - "ndv": 2 + "name": "i_manufact", + "ndv": 1004 }, { - "name": "p_channel_press", - "ndv": 2 + "name": "i_size", + "ndv": 8 }, { - "name": "p_channel_event", - "ndv": 2 + "name": "i_formulation", + "ndv": 344236 }, { - "name": "p_channel_demo", - "ndv": 2 + "name": "i_color", + "ndv": 95 }, { - "name": "p_channel_details", - "ndv": 2242 + "name": "i_units", + "ndv": 21 }, { - "name": "p_purpose", + "name": "i_container", "ndv": 2 }, { - "name": "p_discount_active", - "ndv": 2 + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 + }, + { + "name": "i_product_name", + "ndv": 461487 } ] }, @@ -1434,32 +1561,33 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "=", - "kind": "EQUALS", + "name": ">", + "kind": "GREATER_THAN", "syntax": "BINARY" }, "operands": [ { - "input": 11, - "name": "$11" + "input": 5, + "name": "$5" }, { - "literal": "N", + "literal": 50, "type": { - "type": "CHAR", + "type": "DECIMAL", "nullable": false, - "precision": 1 + "precision": 2, + "scale": 0 } } ] }, - "rowCount": 345 + "rowCount": 231000 }, { "id": "12", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "p_promo_sk" + "i_item_sk" ], "exprs": [ { @@ -1467,7 +1595,7 @@ "name": "$0" } ], - "rowCount": 345 + "rowCount": 231000 }, { "id": "13", @@ -1480,8 +1608,8 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 0, + "name": "$0" }, { "input": 12, @@ -1496,171 +1624,130 @@ "9", "12" ], - "rowCount": 2.095343863627687E25 + "rowCount": 1.1091505354647748E27 }, { "id": "14", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "promotion" ], - "table:alias": "date_dim", + "table:alias": "promotion", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 2300, + "avgRowSize": 517, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "p_promo_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" - }, - { - "type": "DATE", - "nullable": true, - "name": "d_date" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_month_seq" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_week_seq" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_quarter_seq" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_year" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_dow" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_moy" + "name": "p_promo_id" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_dom" + "name": "p_start_date_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_qoy" + "name": "p_end_date_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_fy_year" + "name": "p_item_sk" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 15, + "scale": 2, + "name": "p_cost" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 9, - "name": "d_day_name" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "p_response_target" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "p_promo_name" }, { "type": "CHAR", "nullable": true, "precision": 1, - "name": "d_weekend" + "name": "p_channel_dmail" }, { "type": "CHAR", "nullable": true, "precision": 1, - "name": "d_following_holiday" + "name": "p_channel_email" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_first_dom" + "precision": 1, + "name": "p_channel_catalog" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_last_dom" + "precision": 1, + "name": "p_channel_tv" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_same_day_ly" + "precision": 1, + "name": "p_channel_radio" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_same_day_lq" + "precision": 1, + "name": "p_channel_press" }, { "type": "CHAR", "nullable": true, "precision": 1, - "name": "d_current_day" + "name": "p_channel_event" }, { "type": "CHAR", "nullable": true, "precision": 1, - "name": "d_current_week" + "name": "p_channel_demo" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 100, + "name": "p_channel_details" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "precision": 15, + "name": "p_purpose" }, { "type": "CHAR", "nullable": true, "precision": 1, - "name": "d_current_year" + "name": "p_discount_active" }, { "type": "BIGINT", @@ -1704,149 +1791,91 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 - }, - { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 - }, - { - "name": "d_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 - }, - { - "name": "d_quarter_seq", - "ndv": 808, + "name": "p_promo_sk", + "ndv": 2365, "minValue": 1, - "maxValue": 801 - }, - { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "maxValue": 2300 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "p_channel_tv", + "ndv": 2 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "p_promo_id", + "ndv": 2307 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "p_start_date_sk", + "ndv": 761, + "minValue": 2450096, + "maxValue": 2450915 }, { - "name": "d_qoy", - "ndv": 4, - "minValue": 1, - "maxValue": 4 + "name": "p_end_date_sk", + "ndv": 736, + "minValue": 2450102, + "maxValue": 2450970 }, { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "p_item_sk", + "ndv": 2252, + "minValue": 614, + "maxValue": 461932 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "p_cost", + "ndv": 1, + "minValue": 1000, + "maxValue": 1000 }, { - "name": "d_fy_week_seq", - "ndv": 11297, + "name": "p_response_target", + "ndv": 1, "minValue": 1, - "maxValue": 10436 + "maxValue": 1 }, { - "name": "d_day_name", - "ndv": 7 + "name": "p_promo_name", + "ndv": 11 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "p_channel_dmail", + "ndv": 3 }, { - "name": "d_holiday", + "name": "p_channel_email", "ndv": 2 }, { - "name": "d_weekend", + "name": "p_channel_catalog", "ndv": 2 }, { - "name": "d_following_holiday", + "name": "p_channel_radio", "ndv": 2 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 - }, - { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 - }, - { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 - }, - { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "p_channel_press", + "ndv": 2 }, { - "name": "d_current_day", - "ndv": 1 + "name": "p_channel_event", + "ndv": 2 }, { - "name": "d_current_week", - "ndv": 1 + "name": "p_channel_demo", + "ndv": 2 }, { - "name": "d_current_month", - "ndv": 2 + "name": "p_channel_details", + "ndv": 2242 }, { - "name": "d_current_quarter", + "name": "p_purpose", "ndv": 2 }, { - "name": "d_current_year", + "name": "p_discount_active", "ndv": 2 } ] @@ -1856,61 +1885,32 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 902188800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } + "input": 11, + "name": "$11" }, { - "literal": 904780800000, + "literal": "N", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 1 } } ] }, - "rowCount": 18262.25 + "rowCount": 345 }, { "id": "16", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "p_promo_sk" ], "exprs": [ { @@ -1918,7 +1918,7 @@ "name": "$0" } ], - "rowCount": 18262.25 + "rowCount": 345 }, { "id": "17", @@ -1931,8 +1931,8 @@ }, "operands": [ { - "input": 6, - "name": "$6" + "input": 2, + "name": "$2" }, { "input": 13, @@ -1947,7 +1947,7 @@ "13", "16" ], - "rowCount": 5.739854021030209E28 + "rowCount": 5.73985402103021E28 }, { "id": "18", @@ -2182,13 +2182,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, @@ -3604,22 +3604,50 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": ">", - "kind": "GREATER_THAN", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "input": 5, - "name": "$5" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + { + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], + "type": { + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 902188800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } }, { - "literal": 50, + "literal": 904780800000, "type": { - "type": "DECIMAL", + "type": "TIMESTAMP", "nullable": false, - "precision": 2, - "scale": 0 + "precision": 9 } } ] @@ -3627,13 +3655,13 @@ "inputs": [ "6" ], - "rowCount": 231000 + "rowCount": 18262.25 }, { "id": "31", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk" + "d_date_sk" ], "exprs": [ { @@ -3641,7 +3669,7 @@ "name": "$0" } ], - "rowCount": 231000 + "rowCount": 18262.25 }, { "id": "32", @@ -3654,8 +3682,8 @@ }, "operands": [ { - "input": 1, - "name": "$1" + "input": 6, + "name": "$6" }, { "input": 11, @@ -3670,28 +3698,29 @@ "29", "31" ], - "rowCount": 1.0561256948554074E23 + "rowCount": 8.349450853191845E21 }, { "id": "33", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "=", - "kind": "EQUALS", + "name": ">", + "kind": "GREATER_THAN", "syntax": "BINARY" }, "operands": [ { - "input": 11, - "name": "$11" + "input": 5, + "name": "$5" }, { - "literal": "N", + "literal": 50, "type": { - "type": "CHAR", + "type": "DECIMAL", "nullable": false, - "precision": 1 + "precision": 2, + "scale": 0 } } ] @@ -3699,13 +3728,13 @@ "inputs": [ "10" ], - "rowCount": 345 + "rowCount": 231000 }, { "id": "34", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "p_promo_sk" + "i_item_sk" ], "exprs": [ { @@ -3713,7 +3742,7 @@ "name": "$0" } ], - "rowCount": 345 + "rowCount": 231000 }, { "id": "35", @@ -3726,8 +3755,8 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 1, + "name": "$1" }, { "input": 12, @@ -3742,57 +3771,28 @@ "32", "34" ], - "rowCount": 5.465450470876733E24 + "rowCount": 2.893084720630974E26 }, { "id": "36", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 902188800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } + "input": 11, + "name": "$11" }, { - "literal": 904780800000, + "literal": "N", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 1 } } ] @@ -3800,13 +3800,13 @@ "inputs": [ "14" ], - "rowCount": 18262.25 + "rowCount": 345 }, { "id": "37", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "p_promo_sk" ], "exprs": [ { @@ -3814,7 +3814,7 @@ "name": "$0" } ], - "rowCount": 18262.25 + "rowCount": 345 }, { "id": "38", @@ -3827,8 +3827,8 @@ }, "operands": [ { - "input": 6, - "name": "$6" + "input": 2, + "name": "$2" }, { "input": 13, @@ -5257,22 +5257,50 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": ">", - "kind": "GREATER_THAN", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "input": 5, - "name": "$5" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "literal": 50, + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "DECIMAL", + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 902188800000, + "type": { + "type": "TIMESTAMP", "nullable": false, - "precision": 2, - "scale": 0 + "precision": 9 + } + }, + { + "literal": 904780800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 } } ] @@ -5280,13 +5308,13 @@ "inputs": [ "6" ], - "rowCount": 231000 + "rowCount": 18262.25 }, { "id": "52", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk" + "d_date_sk" ], "exprs": [ { @@ -5294,7 +5322,7 @@ "name": "$0" } ], - "rowCount": 231000 + "rowCount": 18262.25 }, { "id": "53", @@ -5307,8 +5335,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 6, + "name": "$6" }, { "input": 11, @@ -5323,28 +5351,29 @@ "50", "52" ], - "rowCount": 2.6510288818530387E22 + "rowCount": 2.0958334284684263E21 }, { "id": "54", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "=", - "kind": "EQUALS", + "name": ">", + "kind": "GREATER_THAN", "syntax": "BINARY" }, "operands": [ { - "input": 11, - "name": "$11" + "input": 5, + "name": "$5" }, { - "literal": "N", + "literal": 50, "type": { - "type": "CHAR", + "type": "DECIMAL", "nullable": false, - "precision": 1 + "precision": 2, + "scale": 0 } } ] @@ -5352,13 +5381,13 @@ "inputs": [ "10" ], - "rowCount": 345 + "rowCount": 231000 }, { "id": "55", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "p_promo_sk" + "i_item_sk" ], "exprs": [ { @@ -5366,7 +5395,7 @@ "name": "$0" } ], - "rowCount": 345 + "rowCount": 231000 }, { "id": "56", @@ -5379,8 +5408,8 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 0, + "name": "$0" }, { "input": 12, @@ -5395,57 +5424,28 @@ "53", "55" ], - "rowCount": 1.3719074463589475E24 + "rowCount": 7.262062829643098E25 }, { "id": "57", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 902188800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } + "input": 11, + "name": "$11" }, { - "literal": 904780800000, + "literal": "N", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 1 } } ] @@ -5453,13 +5453,13 @@ "inputs": [ "14" ], - "rowCount": 18262.25 + "rowCount": 345 }, { "id": "58", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "p_promo_sk" ], "exprs": [ { @@ -5467,7 +5467,7 @@ "name": "$0" } ], - "rowCount": 18262.25 + "rowCount": 345 }, { "id": "59", @@ -5480,8 +5480,8 @@ }, "operands": [ { - "input": 6, - "name": "$6" + "input": 2, + "name": "$2" }, { "input": 13, @@ -5496,7 +5496,7 @@ "56", "58" ], - "rowCount": 3.7581175143403033E27 + "rowCount": 3.758117514340303E27 }, { "id": "60", @@ -5715,13 +5715,13 @@ }, { "name": "web_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10089, "maxValue": 11550 }, { "name": "web_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10819, "maxValue": 11549 }, @@ -5873,7 +5873,7 @@ "59", "61" ], - "rowCount": 4.735228068068782E28 + "rowCount": 4.735228068068781E28 }, { "id": "63", @@ -5977,7 +5977,7 @@ ] } ], - "rowCount": 4.735228068068782E28 + "rowCount": 4.735228068068781E28 }, { "id": "64", @@ -6041,7 +6041,7 @@ "name": null } ], - "rowCount": 4.735228068068782E27 + "rowCount": 4.735228068068781E27 }, { "id": "65", @@ -6096,7 +6096,7 @@ "name": "$3" } ], - "rowCount": 4.735228068068782E27 + "rowCount": 4.735228068068781E27 }, { "id": "66", diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query81.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query81.q.out index 6c5cefcf3f0a..42396159880e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query81.q.out @@ -1519,7 +1519,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query82.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query82.q.out index 644be5361e56..67aff75e66f1 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query82.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query82.q.out @@ -356,164 +356,297 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "inventory" ], - "table:alias": "date_dim", + "table:alias": "inventory", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 1627857000, + "avgRowSize": 157, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "inv_date_sk" }, { - "type": "VARCHAR", + "type": "BIGINT", "nullable": false, - "precision": 2147483647, - "name": "d_date_id" + "name": "inv_item_sk" }, { - "type": "DATE", - "nullable": true, - "name": "d_date" + "type": "BIGINT", + "nullable": false, + "name": "inv_warehouse_sk" }, { "type": "INTEGER", "nullable": true, - "name": "d_month_seq" + "name": "inv_quantity_on_hand" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_week_seq" + "name": "BLOCK__OFFSET__INSIDE__FILE" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_quarter_seq" + "precision": 2147483647, + "name": "INPUT__FILE__NAME" }, { - "type": "INTEGER", + "fields": [ + { + "type": "BIGINT", + "nullable": true, + "name": "writeid" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "bucketid" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "rowid" + } + ], "nullable": true, - "name": "d_year" + "name": "ROW__ID" }, { - "type": "INTEGER", + "type": "BOOLEAN", "nullable": true, - "name": "d_dow" + "name": "ROW__IS__DELETED" + } + ], + "nullable": false + }, + "colStats": [ + { + "name": "inv_date_sk", + "ndv": 258, + "minValue": 2450815, + "maxValue": 2452635 + }, + { + "name": "inv_item_sk", + "ndv": 464811, + "minValue": 1, + "maxValue": 462000 + }, + { + "name": "inv_quantity_on_hand", + "ndv": 987, + "minValue": 0, + "maxValue": 1000 + }, + { + "name": "inv_warehouse_sk", + "ndv": 27, + "minValue": 1, + "maxValue": 27 + } + ] + }, + { + "id": "3", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "type": "INTEGER", + "input": 3, + "name": "$3" + }, + { + "literal": 100, + "type": { + "type": "INTEGER", + "nullable": false + } + }, + { + "literal": 500, + "type": { + "type": "INTEGER", + "nullable": false + } + } + ] + }, + "rowCount": 406964250 + }, + { + "id": "4", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", + "fields": [ + "inv_date_sk", + "inv_item_sk" + ], + "exprs": [ + { + "input": 0, + "name": "$0" + }, + { + "input": 1, + "name": "$1" + } + ], + "rowCount": 406964250 + }, + { + "id": "5", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", + "table": [ + "default", + "item" + ], + "table:alias": "item", + "inputs": [], + "rowCount": 462000, + "avgRowSize": 1033, + "rowType": { + "fields": [ + { + "type": "BIGINT", + "nullable": false, + "name": "i_item_sk" + }, + { + "type": "VARCHAR", + "nullable": false, + "precision": 2147483647, + "name": "i_item_id" + }, + { + "type": "DATE", "nullable": true, - "name": "d_moy" + "name": "i_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_dom" + "name": "i_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_qoy" + "precision": 200, + "name": "i_item_desc" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_fy_year" + "precision": 7, + "scale": 2, + "name": "i_current_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "i_brand_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "i_brand" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "i_class_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "i_class" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_weekend" + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" + "precision": 50, + "name": "i_category" }, { "type": "INTEGER", "nullable": true, - "name": "d_first_dom" + "name": "i_manufact_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_last_dom" + "precision": 50, + "name": "i_manufact" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_same_day_ly" + "precision": 20, + "name": "i_size" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_same_day_lq" + "precision": 20, + "name": "i_formulation" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "i_units" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 10, + "name": "i_container" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "name": "i_manager_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -557,367 +690,216 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 - }, - { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 - }, - { - "name": "d_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 - }, - { - "name": "d_quarter_seq", - "ndv": 808, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 801 + "maxValue": 462000 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "d_dom", - "ndv": 31, + "name": "i_manufact_id", + "ndv": 987, "minValue": 1, - "maxValue": 31 + "maxValue": 1000 }, { - "name": "d_qoy", + "name": "i_rec_start_date", "ndv": 4, - "minValue": 1, - "maxValue": 4 - }, - { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "minValue": 10161, + "maxValue": 11622 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "d_fy_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "d_day_name", - "ndv": 7 + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "i_brand", + "ndv": 742 }, { - "name": "d_holiday", - "ndv": 2 + "name": "i_class_id", + "ndv": 16, + "minValue": 1, + "maxValue": 16 }, { - "name": "d_weekend", - "ndv": 2 + "name": "i_class", + "ndv": 99 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "i_category", + "ndv": 11 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "i_manufact", + "ndv": 1004 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "i_size", + "ndv": 8 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "i_formulation", + "ndv": 344236 }, { - "name": "d_current_day", - "ndv": 1 + "name": "i_color", + "ndv": 95 }, { - "name": "d_current_week", - "ndv": 1 + "name": "i_units", + "ndv": 21 }, { - "name": "d_current_month", + "name": "i_container", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_current_year", - "ndv": 2 + "name": "i_product_name", + "ndv": 461487 } ] }, { - "id": "3", + "id": "6", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "AND", + "kind": "AND", + "syntax": "BINARY" }, "operands": [ - { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, { "op": { - "name": "CAST", - "kind": "CAST", + "name": "IN", + "kind": "OTHER_FUNCTION", "syntax": "SPECIAL" }, "operands": [ { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 1022716800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - }, - { - "literal": 1027900800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } - } - ] - }, - "rowCount": 18262.25 - }, - { - "id": "4", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", - "fields": [ - "d_date_sk" - ], - "exprs": [ - { - "input": 0, - "name": "$0" - } - ], - "rowCount": 18262.25 - }, - { - "id": "5", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", - "table": [ - "default", - "inventory" - ], - "table:alias": "inventory", - "inputs": [], - "rowCount": 1627857000, - "avgRowSize": 157, - "rowType": { - "fields": [ - { - "type": "BIGINT", - "nullable": false, - "name": "inv_date_sk" - }, - { - "type": "BIGINT", - "nullable": false, - "name": "inv_item_sk" - }, - { - "type": "BIGINT", - "nullable": false, - "name": "inv_warehouse_sk" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "inv_quantity_on_hand" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "BLOCK__OFFSET__INSIDE__FILE" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 2147483647, - "name": "INPUT__FILE__NAME" - }, - { - "fields": [ + "input": 13, + "name": "$13" + }, { - "type": "BIGINT", - "nullable": true, - "name": "writeid" + "literal": 129, + "type": { + "type": "INTEGER", + "nullable": false + } }, { - "type": "INTEGER", - "nullable": true, - "name": "bucketid" + "literal": 437, + "type": { + "type": "INTEGER", + "nullable": false + } }, { - "type": "BIGINT", - "nullable": true, - "name": "rowid" + "literal": 663, + "type": { + "type": "INTEGER", + "nullable": false + } + }, + { + "literal": 727, + "type": { + "type": "INTEGER", + "nullable": false + } } - ], - "nullable": true, - "name": "ROW__ID" - }, - { - "type": "BOOLEAN", - "nullable": true, - "name": "ROW__IS__DELETED" - } - ], - "nullable": false - }, - "colStats": [ - { - "name": "inv_date_sk", - "ndv": 258, - "minValue": 2450815, - "maxValue": 2452635 - }, - { - "name": "inv_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 - }, - { - "name": "inv_quantity_on_hand", - "ndv": 987, - "minValue": 0, - "maxValue": 1000 - }, - { - "name": "inv_warehouse_sk", - "ndv": 27, - "minValue": 1, - "maxValue": 27 - } - ] - }, - { - "id": "6", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" - }, - "operands": [ - { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "input": 3, - "name": "$3" - }, - { - "literal": 100, - "type": { - "type": "INTEGER", - "nullable": false - } + ] }, { - "literal": 500, - "type": { - "type": "INTEGER", - "nullable": false - } + "op": { + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } + }, + { + "input": 5, + "name": "$5" + }, + { + "literal": 30, + "type": { + "type": "DECIMAL", + "nullable": false, + "precision": 12, + "scale": 2 + } + }, + { + "literal": 60, + "type": { + "type": "DECIMAL", + "nullable": false, + "precision": 12, + "scale": 2 + } + } + ] } ] }, - "rowCount": 406964250 + "rowCount": 28875 }, { "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "inv_date_sk", - "inv_item_sk" + "i_item_sk", + "i_item_id", + "i_item_desc", + "i_current_price" ], "exprs": [ { @@ -927,148 +909,210 @@ { "input": 1, "name": "$1" + }, + { + "input": 4, + "name": "$4" + }, + { + "input": 5, + "name": "$5" } ], - "rowCount": 406964250 + "rowCount": 28875 }, { "id": "8", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 1, + "name": "$1" + }, + { + "input": 2, + "name": "$2" + } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "4", + "7" + ], + "rowCount": 1.7626639078125E12 + }, + { + "id": "9", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "item" + "date_dim" ], - "table:alias": "item", + "table:alias": "date_dim", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "i_rec_start_date" + "name": "d_date" }, { - "type": "DATE", + "type": "INTEGER", "nullable": true, - "name": "i_rec_end_date" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 200, - "name": "i_item_desc" + "name": "d_week_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" + "name": "d_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" + "name": "d_year" }, { "type": "INTEGER", "nullable": true, - "name": "i_brand_id" + "name": "d_dow" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_brand" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "i_class_id" + "name": "d_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_class" + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "d_fy_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_category" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "i_manufact_id" + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 6, + "name": "d_quarter_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "i_container" + "name": "d_first_dom" }, { "type": "INTEGER", "nullable": true, - "name": "i_manager_id" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 1, + "name": "d_current_day" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_week" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_month" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_quarter" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -1112,239 +1156,224 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 + }, + { + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 + }, + { + "name": "d_date_id", + "ndv": 71022 + }, + { + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 + }, + { + "name": "d_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 462000 + "maxValue": 10436 }, { - "name": "i_item_id", - "ndv": 247524 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_item_desc", - "ndv": 341846 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "i_manufact_id", - "ndv": 987, + "name": "d_moy", + "ndv": 12, "minValue": 1, - "maxValue": 1000 + "maxValue": 12 }, { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 + }, + { + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "name": "d_fy_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "d_fy_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_brand", - "ndv": 742 + "name": "d_day_name", + "ndv": 7 }, { - "name": "i_class_id", - "ndv": 16, - "minValue": 1, - "maxValue": 16 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "i_class", - "ndv": 99 + "name": "d_holiday", + "ndv": 2 }, { - "name": "i_category_id", - "ndv": 10, - "minValue": 1, - "maxValue": 10 + "name": "d_weekend", + "ndv": 2 }, { - "name": "i_category", - "ndv": 11 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "i_size", - "ndv": 8 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 }, { - "name": "i_color", - "ndv": 95 + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 }, { - "name": "i_units", - "ndv": 21 + "name": "d_current_day", + "ndv": 1 }, { - "name": "i_container", + "name": "d_current_week", + "ndv": 1 + }, + { + "name": "d_current_month", "ndv": 2 }, { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "name": "d_current_quarter", + "ndv": 2 }, { - "name": "i_product_name", - "ndv": 461487 + "name": "d_current_year", + "ndv": 2 } ] }, { - "id": "9", + "id": "10", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "AND", - "kind": "AND", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "op": { - "name": "IN", - "kind": "OTHER_FUNCTION", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 13, - "name": "$13" - }, - { - "literal": 129, - "type": { - "type": "INTEGER", - "nullable": false - } - }, - { - "literal": 437, - "type": { - "type": "INTEGER", - "nullable": false - } - }, - { - "literal": 663, - "type": { - "type": "INTEGER", - "nullable": false - } - }, - { - "literal": 727, - "type": { - "type": "INTEGER", - "nullable": false - } - } - ] + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", + "name": "CAST", + "kind": "CAST", "syntax": "SPECIAL" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "input": 5, - "name": "$5" - }, - { - "literal": 30, - "type": { - "type": "DECIMAL", - "nullable": false, - "precision": 12, - "scale": 2 - } - }, - { - "literal": 60, - "type": { - "type": "DECIMAL", - "nullable": false, - "precision": 12, - "scale": 2 - } + "input": 2, + "name": "$2" } - ] + ], + "type": { + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 1022716800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + }, + { + "literal": 1027900800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } } ] }, - "rowCount": 28875 + "rowCount": 18262.25 }, { - "id": "10", + "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk", - "i_item_id", - "i_item_desc", - "i_current_price" + "d_date_sk" ], "exprs": [ { "input": 0, "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, - { - "input": 4, - "name": "$4" - }, - { - "input": 5, - "name": "$5" } ], - "rowCount": 28875 + "rowCount": 18262.25 }, { - "id": "11", + "id": "12", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", "condition": { "op": { @@ -1354,41 +1383,12 @@ }, "operands": [ { - "input": 1, - "name": "$1" + "input": 6, + "name": "$6" }, - { - "input": 2, - "name": "$2" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "7", - "10" - ], - "rowCount": 1.7626639078125E12 - }, - { - "id": "12", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ { "input": 0, "name": "$0" - }, - { - "input": 1, - "name": "$1" } ] }, @@ -1396,7 +1396,7 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "4", + "8", "11" ], "rowCount": 4828531342567324 @@ -1416,8 +1416,8 @@ "name": "$0" }, { - "input": 4, - "name": "$4" + "input": 3, + "name": "$3" } ] }, @@ -1434,9 +1434,9 @@ "id": "14", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ + 4, 5, - 6, - 7 + 6 ], "aggs": [], "rowCount": 6.258130892474543E24 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query83.q.out index 384cb8cc7c73..024c50446ad8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query83.q.out @@ -630,7 +630,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1053,7 +1053,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1586,13 +1586,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query85.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query85.q.out index 10401d5dee85..134436ff9762 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query85.q.out @@ -1016,7 +1016,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query86.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query86.q.out index 506bc6b979ad..1f4d951b8f79 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query86.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query86.q.out @@ -729,7 +729,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1141,13 +1141,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query87.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query87.q.out index b4558e98b4f6..8723d2fdfa15 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query87.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query87.q.out @@ -879,7 +879,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query88.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query88.q.out index 343e9d70c74c..463ea7d44ff5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query88.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query88.q.out @@ -1286,13 +1286,13 @@ Warning: Map Join MAPJOIN[599][bigTable=?] in task 'Reducer 5' is a cross produc }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query89.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query89.q.out index 838c7400758a..0518783f7ee8 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query89.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query89.q.out @@ -610,13 +610,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -1294,7 +1294,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -1732,13 +1732,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query90.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query90.q.out index d28ae6f952be..e00f4bc56fdc 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query90.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query90.q.out @@ -688,13 +688,13 @@ Warning: Map Join MAPJOIN[149][bigTable=?] in task 'Reducer 2' is a cross produc }, { "name": "wp_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10107, "maxValue": 11568 }, { "name": "wp_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10837, "maxValue": 11567 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query91.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query91.q.out index 968ee6fa9538..beebfacedd73 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query91.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query91.q.out @@ -686,7 +686,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2361,13 +2361,13 @@ }, { "name": "cc_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10227, "maxValue": 11688 }, { "name": "cc_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10957, "maxValue": 11687 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query92.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query92.q.out index 4c0f08eab36d..d50352f4c452 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query92.q.out @@ -6,164 +6,213 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "web_sales" ], - "table:alias": "date_dim", + "table:alias": "web_sales", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 21594638446, + "avgRowSize": 337, "rowType": { "fields": [ { "type": "BIGINT", - "nullable": false, - "name": "d_date_sk" + "nullable": true, + "name": "ws_sold_time_sk" }, { - "type": "VARCHAR", + "type": "BIGINT", + "nullable": true, + "name": "ws_ship_date_sk" + }, + { + "type": "BIGINT", "nullable": false, - "precision": 2147483647, - "name": "d_date_id" + "name": "ws_item_sk" }, { - "type": "DATE", + "type": "BIGINT", "nullable": true, - "name": "d_date" + "name": "ws_bill_customer_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_month_seq" + "name": "ws_bill_cdemo_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_week_seq" + "name": "ws_bill_hdemo_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_quarter_seq" + "name": "ws_bill_addr_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_year" + "name": "ws_ship_customer_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_dow" + "name": "ws_ship_cdemo_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_moy" + "name": "ws_ship_hdemo_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_dom" + "name": "ws_ship_addr_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_qoy" + "name": "ws_web_page_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_fy_year" + "name": "ws_web_site_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_fy_quarter_seq" + "name": "ws_ship_mode_sk" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "ws_warehouse_sk" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "ws_promo_sk" + }, + { + "type": "BIGINT", + "nullable": false, + "name": "ws_order_number" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "ws_quantity" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 7, + "scale": 2, + "name": "ws_wholesale_cost" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "precision": 7, + "scale": 2, + "name": "ws_list_price" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 7, + "scale": 2, + "name": "ws_sales_price" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 7, + "scale": 2, + "name": "ws_ext_discount_amt" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_following_holiday" + "precision": 7, + "scale": 2, + "name": "ws_ext_sales_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_first_dom" + "precision": 7, + "scale": 2, + "name": "ws_ext_wholesale_cost" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_last_dom" + "precision": 7, + "scale": 2, + "name": "ws_ext_list_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_same_day_ly" + "precision": 7, + "scale": 2, + "name": "ws_ext_tax" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_same_day_lq" + "precision": 7, + "scale": 2, + "name": "ws_coupon_amt" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 7, + "scale": 2, + "name": "ws_ext_ship_cost" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 7, + "scale": 2, + "name": "ws_net_paid" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 7, + "scale": 2, + "name": "ws_net_paid_inc_tax" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "precision": 7, + "scale": 2, + "name": "ws_net_paid_inc_ship" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 7, + "scale": 2, + "name": "ws_net_paid_inc_ship_tax" + }, + { + "type": "DECIMAL", + "nullable": true, + "precision": 7, + "scale": 2, + "name": "ws_net_profit" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "ws_sold_date_sk" }, { "type": "BIGINT", @@ -205,152 +254,213 @@ ], "nullable": false }, + "partitionColumns": [ + "ws_sold_date_sk" + ], "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 + "name": "ws_item_sk", + "ndv": 462000, + "minValue": 1, + "maxValue": 462000 }, { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 + "name": "ws_ext_discount_amt", + "ndv": 1141615, + "minValue": 0, + "maxValue": 29982 }, { - "name": "d_date_id", - "ndv": 71022 + "name": "ws_sold_date_sk", + "ndv": 1823, + "minValue": 2450816, + "maxValue": 2452642 }, { - "name": "d_month_seq", - "ndv": 2439, + "name": "ws_sold_time_sk", + "ndv": 85503, "minValue": 0, - "maxValue": 2400 + "maxValue": 86399 }, { - "name": "d_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "ws_ship_date_sk", + "ndv": 1902, + "minValue": 2450817, + "maxValue": 2452762 }, { - "name": "d_quarter_seq", - "ndv": 808, + "name": "ws_bill_customer_sk", + "ndv": 78525965, "minValue": 1, - "maxValue": 801 + "maxValue": 80000000 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "ws_bill_cdemo_sk", + "ndv": 1920801, + "minValue": 1, + "maxValue": 1920800 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "ws_bill_hdemo_sk", + "ndv": 7201, + "minValue": 1, + "maxValue": 7200 }, { - "name": "d_moy", - "ndv": 12, + "name": "ws_bill_addr_sk", + "ndv": 40000001, "minValue": 1, - "maxValue": 12 + "maxValue": 40000000 }, { - "name": "d_dom", - "ndv": 31, + "name": "ws_ship_customer_sk", + "ndv": 78525965, "minValue": 1, - "maxValue": 31 + "maxValue": 80000000 }, { - "name": "d_qoy", - "ndv": 4, + "name": "ws_ship_cdemo_sk", + "ndv": 1920801, "minValue": 1, - "maxValue": 4 + "maxValue": 1920800 }, { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "ws_ship_hdemo_sk", + "ndv": 7201, + "minValue": 1, + "maxValue": 7200 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, + "name": "ws_ship_addr_sk", + "ndv": 40000001, "minValue": 1, - "maxValue": 801 + "maxValue": 40000000 }, { - "name": "d_fy_week_seq", - "ndv": 11297, + "name": "ws_web_page_sk", + "ndv": 4556, "minValue": 1, - "maxValue": 10436 + "maxValue": 4602 }, { - "name": "d_day_name", - "ndv": 7 + "name": "ws_web_site_sk", + "ndv": 85, + "minValue": 1, + "maxValue": 84 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "ws_ship_mode_sk", + "ndv": 20, + "minValue": 1, + "maxValue": 20 }, { - "name": "d_holiday", - "ndv": 2 + "name": "ws_warehouse_sk", + "ndv": 27, + "minValue": 1, + "maxValue": 27 }, { - "name": "d_weekend", - "ndv": 2 + "name": "ws_promo_sk", + "ndv": 2301, + "minValue": 1, + "maxValue": 2300 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "ws_order_number", + "ndv": 1800000000, + "minValue": 1, + "maxValue": 1800000000 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "ws_quantity", + "ndv": 101, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "ws_wholesale_cost", + "ndv": 9382, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "ws_list_price", + "ndv": 31080, + "minValue": 1, + "maxValue": 300 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "ws_sales_price", + "ndv": 31054, + "minValue": 0, + "maxValue": 300 }, { - "name": "d_current_day", - "ndv": 1 + "name": "ws_ext_sales_price", + "ndv": 1155428, + "minValue": 0, + "maxValue": 29970 }, { - "name": "d_current_week", - "ndv": 1 + "name": "ws_ext_wholesale_cost", + "ndv": 381232, + "minValue": 1, + "maxValue": 10000 }, { - "name": "d_current_month", - "ndv": 2 + "name": "ws_ext_list_price", + "ndv": 1208442, + "minValue": 1, + "maxValue": 30000 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "ws_ext_tax", + "ndv": 233719, + "minValue": 0, + "maxValue": 2682.9 }, { - "name": "d_current_year", - "ndv": 2 + "name": "ws_coupon_amt", + "ndv": 1659025, + "minValue": 0, + "maxValue": 28824 + }, + { + "name": "ws_ext_ship_cost", + "ndv": 564319, + "minValue": 0, + "maxValue": 14950 + }, + { + "name": "ws_net_paid", + "ndv": 1870760, + "minValue": 0, + "maxValue": 29970 + }, + { + "name": "ws_net_paid_inc_tax", + "ndv": 2521035, + "minValue": 0, + "maxValue": 32492.9 + }, + { + "name": "ws_net_paid_inc_ship", + "ndv": 2577850, + "minValue": 0, + "maxValue": 44263 + }, + { + "name": "ws_net_paid_inc_ship_tax", + "ndv": 3413793, + "minValue": 0, + "maxValue": 46389.84 + }, + { + "name": "ws_net_profit", + "ndv": 2074138, + "minValue": -10000, + "maxValue": 19980 } ] }, @@ -359,282 +469,228 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "AND", + "kind": "AND", + "syntax": "BINARY" }, "operands": [ - { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, { "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" + "name": "IS NOT NULL", + "kind": "IS_NOT_NULL", + "syntax": "POSTFIX" }, "operands": [ { - "input": 2, - "name": "$2" + "input": 21, + "name": "$21" } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 890179200000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } + ] }, { - "literal": 897955200000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } + "op": { + "name": "IS NOT NULL", + "kind": "IS_NOT_NULL", + "syntax": "POSTFIX" + }, + "operands": [ + { + "input": 33, + "name": "$33" + } + ] } ] }, - "rowCount": 18262.25 + "rowCount": 1.7491657141260002E10 }, { "id": "2", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "ws_item_sk", + "ws_ext_discount_amt", + "ws_sold_date_sk" ], "exprs": [ { - "input": 0, - "name": "$0" + "input": 2, + "name": "$2" + }, + { + "input": 21, + "name": "$21" + }, + { + "input": 33, + "name": "$33" } ], - "rowCount": 18262.25 + "rowCount": 1.7491657141260002E10 }, { "id": "3", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "web_sales" + "date_dim" ], - "table:alias": "web_sales", + "table:alias": "date_dim", "inputs": [], - "rowCount": 21594638446, - "avgRowSize": 337, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ - { - "type": "BIGINT", - "nullable": true, - "name": "ws_sold_time_sk" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "ws_ship_date_sk" - }, { "type": "BIGINT", "nullable": false, - "name": "ws_item_sk" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "ws_bill_customer_sk" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "ws_bill_cdemo_sk" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "ws_bill_hdemo_sk" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "ws_bill_addr_sk" + "name": "d_date_sk" }, { - "type": "BIGINT", - "nullable": true, - "name": "ws_ship_customer_sk" + "type": "VARCHAR", + "nullable": false, + "precision": 2147483647, + "name": "d_date_id" }, { - "type": "BIGINT", + "type": "DATE", "nullable": true, - "name": "ws_ship_cdemo_sk" + "name": "d_date" }, { - "type": "BIGINT", + "type": "INTEGER", "nullable": true, - "name": "ws_ship_hdemo_sk" + "name": "d_month_seq" }, { - "type": "BIGINT", + "type": "INTEGER", "nullable": true, - "name": "ws_ship_addr_sk" + "name": "d_week_seq" }, { - "type": "BIGINT", + "type": "INTEGER", "nullable": true, - "name": "ws_web_page_sk" + "name": "d_quarter_seq" }, { - "type": "BIGINT", + "type": "INTEGER", "nullable": true, - "name": "ws_web_site_sk" + "name": "d_year" }, { - "type": "BIGINT", + "type": "INTEGER", "nullable": true, - "name": "ws_ship_mode_sk" + "name": "d_dow" }, { - "type": "BIGINT", + "type": "INTEGER", "nullable": true, - "name": "ws_warehouse_sk" + "name": "d_moy" }, { - "type": "BIGINT", + "type": "INTEGER", "nullable": true, - "name": "ws_promo_sk" + "name": "d_dom" }, { - "type": "BIGINT", - "nullable": false, - "name": "ws_order_number" + "type": "INTEGER", + "nullable": true, + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "ws_quantity" + "name": "d_fy_year" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_wholesale_cost" + "name": "d_fy_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_list_price" + "name": "d_fy_week_seq" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_sales_price" + "precision": 9, + "name": "d_day_name" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_ext_discount_amt" + "precision": 6, + "name": "d_quarter_name" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_ext_sales_price" + "precision": 1, + "name": "d_holiday" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_ext_wholesale_cost" + "precision": 1, + "name": "d_weekend" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_ext_list_price" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_ext_tax" + "name": "d_first_dom" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_coupon_amt" + "name": "d_last_dom" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_ext_ship_cost" + "name": "d_same_day_ly" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_net_paid" + "name": "d_same_day_lq" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_net_paid_inc_tax" + "precision": 1, + "name": "d_current_day" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_net_paid_inc_ship" + "precision": 1, + "name": "d_current_week" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_net_paid_inc_ship_tax" + "precision": 1, + "name": "d_current_month" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 7, - "scale": 2, - "name": "ws_net_profit" + "precision": 1, + "name": "d_current_quarter" }, { - "type": "BIGINT", + "type": "CHAR", "nullable": true, - "name": "ws_sold_date_sk" + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -676,213 +732,152 @@ ], "nullable": false }, - "partitionColumns": [ - "ws_sold_date_sk" - ], "colStats": [ { - "name": "ws_item_sk", - "ndv": 462000, - "minValue": 1, - "maxValue": 462000 + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 }, { - "name": "ws_ext_discount_amt", - "ndv": 1141615, - "minValue": 0, - "maxValue": 29982 + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 }, { - "name": "ws_sold_date_sk", - "ndv": 1823, - "minValue": 2450816, - "maxValue": 2452642 + "name": "d_date_id", + "ndv": 71022 }, { - "name": "ws_sold_time_sk", - "ndv": 85503, + "name": "d_month_seq", + "ndv": 2439, "minValue": 0, - "maxValue": 86399 - }, - { - "name": "ws_ship_date_sk", - "ndv": 1902, - "minValue": 2450817, - "maxValue": 2452762 - }, - { - "name": "ws_bill_customer_sk", - "ndv": 78525965, - "minValue": 1, - "maxValue": 80000000 - }, - { - "name": "ws_bill_cdemo_sk", - "ndv": 1920801, - "minValue": 1, - "maxValue": 1920800 - }, - { - "name": "ws_bill_hdemo_sk", - "ndv": 7201, - "minValue": 1, - "maxValue": 7200 - }, - { - "name": "ws_bill_addr_sk", - "ndv": 40000001, - "minValue": 1, - "maxValue": 40000000 - }, - { - "name": "ws_ship_customer_sk", - "ndv": 78525965, - "minValue": 1, - "maxValue": 80000000 + "maxValue": 2400 }, { - "name": "ws_ship_cdemo_sk", - "ndv": 1920801, + "name": "d_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 1920800 + "maxValue": 10436 }, { - "name": "ws_ship_hdemo_sk", - "ndv": 7201, + "name": "d_quarter_seq", + "ndv": 808, "minValue": 1, - "maxValue": 7200 + "maxValue": 801 }, { - "name": "ws_ship_addr_sk", - "ndv": 40000001, - "minValue": 1, - "maxValue": 40000000 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "ws_web_page_sk", - "ndv": 4556, - "minValue": 1, - "maxValue": 4602 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "ws_web_site_sk", - "ndv": 85, + "name": "d_moy", + "ndv": 12, "minValue": 1, - "maxValue": 84 + "maxValue": 12 }, { - "name": "ws_ship_mode_sk", - "ndv": 20, + "name": "d_dom", + "ndv": 31, "minValue": 1, - "maxValue": 20 + "maxValue": 31 }, { - "name": "ws_warehouse_sk", - "ndv": 27, + "name": "d_qoy", + "ndv": 4, "minValue": 1, - "maxValue": 27 + "maxValue": 4 }, { - "name": "ws_promo_sk", - "ndv": 2301, - "minValue": 1, - "maxValue": 2300 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "ws_order_number", - "ndv": 1800000000, + "name": "d_fy_quarter_seq", + "ndv": 808, "minValue": 1, - "maxValue": 1800000000 + "maxValue": 801 }, { - "name": "ws_quantity", - "ndv": 101, + "name": "d_fy_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 100 + "maxValue": 10436 }, { - "name": "ws_wholesale_cost", - "ndv": 9382, - "minValue": 1, - "maxValue": 100 + "name": "d_day_name", + "ndv": 7 }, { - "name": "ws_list_price", - "ndv": 31080, - "minValue": 1, - "maxValue": 300 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "ws_sales_price", - "ndv": 31054, - "minValue": 0, - "maxValue": 300 + "name": "d_holiday", + "ndv": 2 }, { - "name": "ws_ext_sales_price", - "ndv": 1155428, - "minValue": 0, - "maxValue": 29970 + "name": "d_weekend", + "ndv": 2 }, { - "name": "ws_ext_wholesale_cost", - "ndv": 381232, - "minValue": 1, - "maxValue": 10000 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "ws_ext_list_price", - "ndv": 1208442, - "minValue": 1, - "maxValue": 30000 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "ws_ext_tax", - "ndv": 233719, - "minValue": 0, - "maxValue": 2682.9 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "ws_coupon_amt", - "ndv": 1659025, - "minValue": 0, - "maxValue": 28824 + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 }, { - "name": "ws_ext_ship_cost", - "ndv": 564319, - "minValue": 0, - "maxValue": 14950 + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 }, { - "name": "ws_net_paid", - "ndv": 1870760, - "minValue": 0, - "maxValue": 29970 + "name": "d_current_day", + "ndv": 1 }, { - "name": "ws_net_paid_inc_tax", - "ndv": 2521035, - "minValue": 0, - "maxValue": 32492.9 + "name": "d_current_week", + "ndv": 1 }, { - "name": "ws_net_paid_inc_ship", - "ndv": 2577850, - "minValue": 0, - "maxValue": 44263 + "name": "d_current_month", + "ndv": 2 }, { - "name": "ws_net_paid_inc_ship_tax", - "ndv": 3413793, - "minValue": 0, - "maxValue": 46389.84 + "name": "d_current_quarter", + "ndv": 2 }, { - "name": "ws_net_profit", - "ndv": 2074138, - "minValue": -10000, - "maxValue": 19980 + "name": "d_current_year", + "ndv": 2 } ] }, @@ -891,67 +886,101 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "AND", - "kind": "AND", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "op": { - "name": "IS NOT NULL", - "kind": "IS_NOT_NULL", - "syntax": "POSTFIX" - }, - "operands": [ - { - "input": 21, - "name": "$21" - } - ] + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { "op": { - "name": "IS NOT NULL", - "kind": "IS_NOT_NULL", - "syntax": "POSTFIX" + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" }, "operands": [ { - "input": 33, - "name": "$33" + "input": 2, + "name": "$2" } - ] + ], + "type": { + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 890179200000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } + }, + { + "literal": 897955200000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 + } } ] }, - "rowCount": 1.7491657141260002E10 + "rowCount": 18262.25 }, { "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "ws_item_sk", - "ws_ext_discount_amt", - "ws_sold_date_sk" + "d_date_sk" ], "exprs": [ { - "input": 2, - "name": "$2" - }, - { - "input": 21, - "name": "$21" - }, - { - "input": 33, - "name": "$33" + "input": 0, + "name": "$0" } ], - "rowCount": 1.7491657141260002E10 + "rowCount": 18262.25 }, { "id": "6", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 3, + "name": "$3" + }, + { + "input": 2, + "name": "$2" + } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "2", + "5" + ], + "rowCount": 4.791555234419632E13 + }, + { + "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", @@ -1148,13 +1177,13 @@ }, { "name": "i_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10161, "maxValue": 11622 }, { "name": "i_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10891, "maxValue": 11621 }, @@ -1241,7 +1270,7 @@ ] }, { - "id": "7", + "id": "8", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -1266,7 +1295,7 @@ "rowCount": 69300 }, { - "id": "8", + "id": "9", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "i_item_sk" @@ -1280,7 +1309,7 @@ "rowCount": 69300 }, { - "id": "9", + "id": "10", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", "condition": { "op": { @@ -1290,8 +1319,8 @@ }, "operands": [ { - "input": 3, - "name": "$3" + "input": 4, + "name": "$4" }, { "input": 0, @@ -1303,13 +1332,13 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "5", - "8" + "6", + "9" ], - "rowCount": 1.8182577598339772E14 + "rowCount": 498082166617920768 }, { - "id": "10", + "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -1325,12 +1354,12 @@ ] }, "inputs": [ - "3" + "0" ], "rowCount": 1.94351746014E10 }, { - "id": "11", + "id": "12", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "ws_item_sk", @@ -1354,7 +1383,7 @@ "rowCount": 1.94351746014E10 }, { - "id": "12", + "id": "13", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -1407,12 +1436,12 @@ ] }, "inputs": [ - "0" + "3" ], "rowCount": 18262.25 }, { - "id": "13", + "id": "14", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "d_date_sk" @@ -1426,7 +1455,7 @@ "rowCount": 18262.25 }, { - "id": "14", + "id": "15", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", "condition": { "op": { @@ -1449,13 +1478,13 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "11", - "13" + "12", + "14" ], "rowCount": 5.323950260466258E13 }, { - "id": "15", + "id": "16", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ 0 @@ -1499,7 +1528,7 @@ "rowCount": 5.323950260466258E12 }, { - "id": "16", + "id": "17", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -1545,7 +1574,7 @@ "rowCount": 4.791555234419632E12 }, { - "id": "17", + "id": "18", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "_o__c0", @@ -1610,7 +1639,7 @@ "rowCount": 4.791555234419632E12 }, { - "id": "18", + "id": "19", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", "condition": { "op": { @@ -1627,12 +1656,12 @@ }, "operands": [ { - "input": 5, - "name": "$5" + "input": 6, + "name": "$6" }, { - "input": 3, - "name": "$3" + "input": 4, + "name": "$4" } ] }, @@ -1648,8 +1677,8 @@ "name": "$1" }, { - "input": 4, - "name": "$4" + "input": 5, + "name": "$5" } ] } @@ -1659,36 +1688,7 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "9", - "17" - ], - "rowCount": 6.534211864992455E25 - }, - { - "id": "19", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 0, - "name": "$0" - }, - { - "input": 3, - "name": "$3" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "2", + "10", "18" ], "rowCount": 1.789941159471877E29 @@ -1712,7 +1712,7 @@ }, "distinct": false, "operands": [ - 2 + 1 ], "name": null } diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query94.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query94.q.out index 58e1f06e35c9..01291f85f99e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query94.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query94.q.out @@ -4,6 +4,259 @@ { "id": "0", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", + "table": [ + "default", + "customer_address" + ], + "table:alias": "customer_address", + "inputs": [], + "rowCount": 40000000, + "avgRowSize": 607, + "rowType": { + "fields": [ + { + "type": "BIGINT", + "nullable": false, + "name": "ca_address_sk" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 16, + "name": "ca_address_id" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_street_number" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 60, + "name": "ca_street_name" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 15, + "name": "ca_street_type" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_suite_number" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 60, + "name": "ca_city" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 30, + "name": "ca_county" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 2, + "name": "ca_state" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_zip" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 20, + "name": "ca_country" + }, + { + "type": "DECIMAL", + "nullable": true, + "precision": 5, + "scale": 2, + "name": "ca_gmt_offset" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 20, + "name": "ca_location_type" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "BLOCK__OFFSET__INSIDE__FILE" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 2147483647, + "name": "INPUT__FILE__NAME" + }, + { + "fields": [ + { + "type": "BIGINT", + "nullable": true, + "name": "writeid" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "bucketid" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "rowid" + } + ], + "nullable": true, + "name": "ROW__ID" + }, + { + "type": "BOOLEAN", + "nullable": true, + "name": "ROW__IS__DELETED" + } + ], + "nullable": false + }, + "colStats": [ + { + "name": "ca_address_sk", + "ndv": 40618307, + "minValue": 1, + "maxValue": 40000000 + }, + { + "name": "ca_state", + "ndv": 53 + }, + { + "name": "ca_address_id", + "ndv": 39667899 + }, + { + "name": "ca_street_number", + "ndv": 1014 + }, + { + "name": "ca_street_name", + "ndv": 8358 + }, + { + "name": "ca_street_type", + "ndv": 21 + }, + { + "name": "ca_suite_number", + "ndv": 76 + }, + { + "name": "ca_city", + "ndv": 985 + }, + { + "name": "ca_county", + "ndv": 1930 + }, + { + "name": "ca_zip", + "ndv": 9538 + }, + { + "name": "ca_country", + "ndv": 2 + }, + { + "name": "ca_gmt_offset", + "ndv": 6, + "minValue": -10, + "maxValue": -5 + }, + { + "name": "ca_location_type", + "ndv": 4 + } + ] + }, + { + "id": "1", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 8, + "name": "$8" + }, + { + "literal": "TX", + "type": { + "type": "CHAR", + "nullable": false, + "precision": 2 + } + } + ] + }, + "rowCount": 6000000 + }, + { + "id": "2", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", + "fields": [ + "ca_address_sk", + "ca_state" + ], + "exprs": [ + { + "input": 0, + "name": "$0" + }, + { + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": "TX", + "type": { + "type": "CHAR", + "nullable": false, + "precision": 2 + } + } + ], + "type": { + "type": "CHAR", + "nullable": true, + "precision": 2 + } + } + ], + "rowCount": 6000000 + }, + { + "id": "3", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", "web_sales" @@ -465,7 +718,7 @@ ] }, { - "id": "1", + "id": "4", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -518,7 +771,7 @@ "rowCount": 1.5746426616519003E10 }, { - "id": "2", + "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "ws_ship_date_sk", @@ -562,95 +815,168 @@ "rowCount": 1.5746426616519003E10 }, { - "id": "3", + "id": "6", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "customer_address" + "date_dim" ], - "table:alias": "customer_address", + "table:alias": "date_dim", "inputs": [], - "rowCount": 40000000, - "avgRowSize": 607, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "ca_address_sk" + "name": "d_date_sk" + }, + { + "type": "VARCHAR", + "nullable": false, + "precision": 2147483647, + "name": "d_date_id" + }, + { + "type": "DATE", + "nullable": true, + "name": "d_date" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_month_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_week_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_quarter_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_year" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_dow" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_moy" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_qoy" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_year" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_quarter_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 16, - "name": "ca_address_id" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_street_number" + "precision": 6, + "name": "d_quarter_name" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 60, - "name": "ca_street_name" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 15, - "name": "ca_street_type" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_suite_number" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 60, - "name": "ca_city" + "name": "d_first_dom" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 30, - "name": "ca_county" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 2, - "name": "ca_state" + "precision": 1, + "name": "d_current_day" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_zip" + "precision": 1, + "name": "d_current_week" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 20, - "name": "ca_country" + "precision": 1, + "name": "d_current_month" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 5, - "scale": 2, - "name": "ca_gmt_offset" + "precision": 1, + "name": "d_current_quarter" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "ca_location_type" + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -694,516 +1020,214 @@ }, "colStats": [ { - "name": "ca_address_sk", - "ndv": 40618307, + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 + }, + { + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 + }, + { + "name": "d_date_id", + "ndv": 71022 + }, + { + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 + }, + { + "name": "d_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 40000000 + "maxValue": 10436 }, { - "name": "ca_state", - "ndv": 53 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "ca_address_id", - "ndv": 39667899 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "ca_street_number", - "ndv": 1014 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "ca_street_name", - "ndv": 8358 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "ca_street_type", - "ndv": 21 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 }, { - "name": "ca_suite_number", - "ndv": 76 + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "ca_city", - "ndv": 985 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "ca_county", - "ndv": 1930 + "name": "d_fy_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "ca_zip", - "ndv": 9538 + "name": "d_fy_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "ca_country", - "ndv": 2 + "name": "d_day_name", + "ndv": 7 }, { - "name": "ca_gmt_offset", - "ndv": 6, - "minValue": -10, - "maxValue": -5 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "ca_location_type", - "ndv": 4 - } - ] - }, - { - "id": "4", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 8, - "name": "$8" - }, - { - "literal": "TX", - "type": { - "type": "CHAR", - "nullable": false, - "precision": 2 - } - } - ] - }, - "rowCount": 6000000 - }, - { - "id": "5", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", - "fields": [ - "ca_address_sk", - "ca_state" - ], - "exprs": [ - { - "input": 0, - "name": "$0" + "name": "d_holiday", + "ndv": 2 }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "literal": "TX", - "type": { - "type": "CHAR", - "nullable": false, - "precision": 2 - } - } - ], - "type": { - "type": "CHAR", - "nullable": true, - "precision": 2 - } - } - ], - "rowCount": 6000000 - }, - { - "id": "6", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 1, - "name": "$1" - }, - { - "input": 7, - "name": "$7" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "2", - "5" - ], - "rowCount": 14171783954867102 - }, - { - "id": "7", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", - "table": [ - "default", - "web_site" - ], - "table:alias": "web_site", - "inputs": [], - "rowCount": 84, - "avgRowSize": 1331, - "rowType": { - "fields": [ - { - "type": "BIGINT", - "nullable": false, - "name": "web_site_sk" - }, - { - "type": "VARCHAR", - "nullable": false, - "precision": 2147483647, - "name": "web_site_id" - }, - { - "type": "DATE", - "nullable": true, - "name": "web_rec_start_date" - }, - { - "type": "DATE", - "nullable": true, - "name": "web_rec_end_date" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "web_name" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "web_open_date_sk" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "web_close_date_sk" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "web_class" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 40, - "name": "web_manager" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "web_mkt_id" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "web_mkt_class" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 100, - "name": "web_mkt_desc" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 40, - "name": "web_market_manager" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "web_company_id" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 50, - "name": "web_company_name" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "web_street_number" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 60, - "name": "web_street_name" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 15, - "name": "web_street_type" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "web_suite_number" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 60, - "name": "web_city" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 30, - "name": "web_county" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 2, - "name": "web_state" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "web_zip" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 20, - "name": "web_country" - }, - { - "type": "DECIMAL", - "nullable": true, - "precision": 5, - "scale": 2, - "name": "web_gmt_offset" - }, - { - "type": "DECIMAL", - "nullable": true, - "precision": 5, - "scale": 2, - "name": "web_tax_percentage" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "BLOCK__OFFSET__INSIDE__FILE" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 2147483647, - "name": "INPUT__FILE__NAME" - }, - { - "fields": [ - { - "type": "BIGINT", - "nullable": true, - "name": "writeid" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "bucketid" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "rowid" - } - ], - "nullable": true, - "name": "ROW__ID" - }, - { - "type": "BOOLEAN", - "nullable": true, - "name": "ROW__IS__DELETED" - } - ], - "nullable": false - }, - "colStats": [ - { - "name": "web_site_sk", - "ndv": 84, - "minValue": 1, - "maxValue": 84 - }, - { - "name": "web_company_name", - "ndv": 7 - }, - { - "name": "web_site_id", - "ndv": 42 - }, - { - "name": "web_rec_start_date", - "ndv": 0, - "minValue": 10089, - "maxValue": 11550 - }, - { - "name": "web_rec_end_date", - "ndv": 0, - "minValue": 10819, - "maxValue": 11549 - }, - { - "name": "web_name", - "ndv": 15 - }, - { - "name": "web_open_date_sk", - "ndv": 42, - "minValue": 2450118, - "maxValue": 2450807 - }, - { - "name": "web_close_date_sk", - "ndv": 28, - "minValue": 2440993, - "maxValue": 2446218 - }, - { - "name": "web_class", + "name": "d_weekend", "ndv": 2 }, { - "name": "web_manager", - "ndv": 60 - }, - { - "name": "web_mkt_id", - "ndv": 6, - "minValue": 1, - "maxValue": 6 - }, - { - "name": "web_mkt_class", - "ndv": 65 - }, - { - "name": "web_mkt_desc", - "ndv": 64 - }, - { - "name": "web_market_manager", - "ndv": 66 - }, - { - "name": "web_company_id", - "ndv": 6, - "minValue": 1, - "maxValue": 6 - }, - { - "name": "web_street_number", - "ndv": 58 - }, - { - "name": "web_street_name", - "ndv": 80 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "web_street_type", - "ndv": 20 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "web_suite_number", - "ndv": 51 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "web_city", - "ndv": 52 + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 }, { - "name": "web_county", - "ndv": 58 + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 }, { - "name": "web_state", - "ndv": 30 + "name": "d_current_day", + "ndv": 1 }, { - "name": "web_zip", - "ndv": 56 + "name": "d_current_week", + "ndv": 1 }, { - "name": "web_country", + "name": "d_current_month", "ndv": 2 }, { - "name": "web_gmt_offset", - "ndv": 4, - "minValue": -8, - "maxValue": -5 + "name": "d_current_quarter", + "ndv": 2 }, { - "name": "web_tax_percentage", - "ndv": 13, - "minValue": 0, - "maxValue": 0.12 + "name": "d_current_year", + "ndv": 2 } ] }, { - "id": "8", + "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "input": 14, - "name": "$14" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "literal": "pri ", + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "CHAR", + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 925516800000, + "type": { + "type": "TIMESTAMP", "nullable": false, - "precision": 50 + "precision": 9 + } + }, + { + "literal": 930700800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 } } ] }, - "rowCount": 12.6 + "rowCount": 18262.25 }, { - "id": "9", + "id": "8", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "web_site_sk", - "web_company_name" + "d_date_sk", + "d_date" ], "exprs": [ { @@ -1211,29 +1235,40 @@ "name": "$0" }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" + "input": 2, + "name": "$2" + } + ], + "rowCount": 18262.25 + }, + { + "id": "9", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 0, + "name": "$0" }, - "operands": [ - { - "literal": "pri ", - "type": { - "type": "CHAR", - "nullable": false, - "precision": 50 - } - } - ], - "type": { - "type": "CHAR", - "nullable": true, - "precision": 50 + { + "input": 7, + "name": "$7" } - } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "5", + "8" ], - "rowCount": 12.6 + "rowCount": 4.3134776921628625E13 }, { "id": "10", @@ -1246,12 +1281,12 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 3, + "name": "$3" }, { - "input": 9, - "name": "$9" + "input": 0, + "name": "$0" } ] }, @@ -1259,174 +1294,174 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "6", + "2", "9" ], - "rowCount": 26784671674698820 + "rowCount": 3.8821299229465756E19 }, { "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "web_site" ], - "table:alias": "date_dim", + "table:alias": "web_site", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 84, + "avgRowSize": 1331, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "web_site_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "web_site_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" + "name": "web_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_month_seq" + "name": "web_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_week_seq" + "precision": 50, + "name": "web_name" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_quarter_seq" + "name": "web_open_date_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_year" + "name": "web_close_date_sk" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_dow" + "precision": 50, + "name": "web_class" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_moy" + "precision": 40, + "name": "web_manager" }, { "type": "INTEGER", "nullable": true, - "name": "d_dom" + "name": "web_mkt_id" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_qoy" + "precision": 50, + "name": "web_mkt_class" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_fy_year" + "precision": 100, + "name": "web_mkt_desc" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 40, + "name": "web_market_manager" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "web_company_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "web_company_name" }, { "type": "CHAR", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "precision": 10, + "name": "web_street_number" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 60, + "name": "web_street_name" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 15, + "name": "web_street_type" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_first_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_last_dom" + "precision": 10, + "name": "web_suite_number" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_same_day_ly" + "precision": 60, + "name": "web_city" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_same_day_lq" + "precision": 30, + "name": "web_county" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 2, + "name": "web_state" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "web_zip" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 20, + "name": "web_country" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "precision": 5, + "scale": 2, + "name": "web_gmt_offset" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 5, + "scale": 2, + "name": "web_tax_percentage" }, { "type": "BIGINT", @@ -1470,150 +1505,126 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 + "name": "web_site_sk", + "ndv": 84, + "minValue": 1, + "maxValue": 84 }, { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 + "name": "web_company_name", + "ndv": 7 }, { - "name": "d_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "web_site_id", + "ndv": 42 }, { - "name": "d_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "web_rec_start_date", + "ndv": 4, + "minValue": 10089, + "maxValue": 11550 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "web_rec_end_date", + "ndv": 3, + "minValue": 10819, + "maxValue": 11549 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "web_name", + "ndv": 15 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "web_open_date_sk", + "ndv": 42, + "minValue": 2450118, + "maxValue": 2450807 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "web_close_date_sk", + "ndv": 28, + "minValue": 2440993, + "maxValue": 2446218 }, { - "name": "d_qoy", - "ndv": 4, - "minValue": 1, - "maxValue": 4 + "name": "web_class", + "ndv": 2 }, { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "web_manager", + "ndv": 60 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, + "name": "web_mkt_id", + "ndv": 6, "minValue": 1, - "maxValue": 801 + "maxValue": 6 }, { - "name": "d_fy_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "web_mkt_class", + "ndv": 65 }, { - "name": "d_day_name", - "ndv": 7 + "name": "web_mkt_desc", + "ndv": 64 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "web_market_manager", + "ndv": 66 }, { - "name": "d_holiday", - "ndv": 2 + "name": "web_company_id", + "ndv": 6, + "minValue": 1, + "maxValue": 6 }, { - "name": "d_weekend", - "ndv": 2 + "name": "web_street_number", + "ndv": 58 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "web_street_name", + "ndv": 80 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "web_street_type", + "ndv": 20 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "web_suite_number", + "ndv": 51 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "web_city", + "ndv": 52 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "web_county", + "ndv": 58 }, { - "name": "d_current_day", - "ndv": 1 + "name": "web_state", + "ndv": 30 }, { - "name": "d_current_week", - "ndv": 1 + "name": "web_zip", + "ndv": 56 }, { - "name": "d_current_month", + "name": "web_country", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "web_gmt_offset", + "ndv": 4, + "minValue": -8, + "maxValue": -5 }, { - "name": "d_current_year", - "ndv": 2 + "name": "web_tax_percentage", + "ndv": 13, + "minValue": 0, + "maxValue": 0.12 } ] }, @@ -1622,62 +1633,33 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 925516800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } + "input": 14, + "name": "$14" }, { - "literal": 930700800000, + "literal": "pri ", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 50 } } ] }, - "rowCount": 18262.25 + "rowCount": 12.6 }, { "id": "13", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk", - "d_date" + "web_site_sk", + "web_company_name" ], "exprs": [ { @@ -1685,11 +1667,29 @@ "name": "$0" }, { - "input": 2, - "name": "$2" + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": "pri ", + "type": { + "type": "CHAR", + "nullable": false, + "precision": 50 + } + } + ], + "type": { + "type": "CHAR", + "nullable": true, + "precision": 50 + } } ], - "rowCount": 18262.25 + "rowCount": 12.6 }, { "id": "14", @@ -1702,8 +1702,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 4, + "name": "$4" }, { "input": 11, @@ -1739,14 +1739,6 @@ "web_company_name" ], "exprs": [ - { - "input": 0, - "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, { "input": 2, "name": "$2" @@ -1767,14 +1759,6 @@ "input": 6, "name": "$6" }, - { - "input": 11, - "name": "$11" - }, - { - "input": 12, - "name": "$12" - }, { "input": 7, "name": "$7" @@ -1790,6 +1774,22 @@ { "input": 10, "name": "$10" + }, + { + "input": 0, + "name": "$0" + }, + { + "input": 1, + "name": "$1" + }, + { + "input": 11, + "name": "$11" + }, + { + "input": 12, + "name": "$12" } ], "rowCount": 7.337225554369027E19 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query95.q.out index ada6d281e60b..9a271e2f0627 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query95.q.out @@ -4,6 +4,259 @@ { "id": "0", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", + "table": [ + "default", + "customer_address" + ], + "table:alias": "customer_address", + "inputs": [], + "rowCount": 40000000, + "avgRowSize": 607, + "rowType": { + "fields": [ + { + "type": "BIGINT", + "nullable": false, + "name": "ca_address_sk" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 16, + "name": "ca_address_id" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_street_number" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 60, + "name": "ca_street_name" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 15, + "name": "ca_street_type" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_suite_number" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 60, + "name": "ca_city" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 30, + "name": "ca_county" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 2, + "name": "ca_state" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 10, + "name": "ca_zip" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 20, + "name": "ca_country" + }, + { + "type": "DECIMAL", + "nullable": true, + "precision": 5, + "scale": 2, + "name": "ca_gmt_offset" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 20, + "name": "ca_location_type" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "BLOCK__OFFSET__INSIDE__FILE" + }, + { + "type": "VARCHAR", + "nullable": true, + "precision": 2147483647, + "name": "INPUT__FILE__NAME" + }, + { + "fields": [ + { + "type": "BIGINT", + "nullable": true, + "name": "writeid" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "bucketid" + }, + { + "type": "BIGINT", + "nullable": true, + "name": "rowid" + } + ], + "nullable": true, + "name": "ROW__ID" + }, + { + "type": "BOOLEAN", + "nullable": true, + "name": "ROW__IS__DELETED" + } + ], + "nullable": false + }, + "colStats": [ + { + "name": "ca_address_sk", + "ndv": 40618307, + "minValue": 1, + "maxValue": 40000000 + }, + { + "name": "ca_state", + "ndv": 53 + }, + { + "name": "ca_address_id", + "ndv": 39667899 + }, + { + "name": "ca_street_number", + "ndv": 1014 + }, + { + "name": "ca_street_name", + "ndv": 8358 + }, + { + "name": "ca_street_type", + "ndv": 21 + }, + { + "name": "ca_suite_number", + "ndv": 76 + }, + { + "name": "ca_city", + "ndv": 985 + }, + { + "name": "ca_county", + "ndv": 1930 + }, + { + "name": "ca_zip", + "ndv": 9538 + }, + { + "name": "ca_country", + "ndv": 2 + }, + { + "name": "ca_gmt_offset", + "ndv": 6, + "minValue": -10, + "maxValue": -5 + }, + { + "name": "ca_location_type", + "ndv": 4 + } + ] + }, + { + "id": "1", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 8, + "name": "$8" + }, + { + "literal": "TX", + "type": { + "type": "CHAR", + "nullable": false, + "precision": 2 + } + } + ] + }, + "rowCount": 6000000 + }, + { + "id": "2", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", + "fields": [ + "ca_address_sk", + "ca_state" + ], + "exprs": [ + { + "input": 0, + "name": "$0" + }, + { + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": "TX", + "type": { + "type": "CHAR", + "nullable": false, + "precision": 2 + } + } + ], + "type": { + "type": "CHAR", + "nullable": true, + "precision": 2 + } + } + ], + "rowCount": 6000000 + }, + { + "id": "3", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", "web_sales" @@ -465,7 +718,7 @@ ] }, { - "id": "1", + "id": "4", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { @@ -518,7 +771,7 @@ "rowCount": 1.5746426616519003E10 }, { - "id": "2", + "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ "ws_ship_date_sk", @@ -557,95 +810,168 @@ "rowCount": 1.5746426616519003E10 }, { - "id": "3", + "id": "6", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "customer_address" + "date_dim" ], - "table:alias": "customer_address", + "table:alias": "date_dim", "inputs": [], - "rowCount": 40000000, - "avgRowSize": 607, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "ca_address_sk" + "name": "d_date_sk" + }, + { + "type": "VARCHAR", + "nullable": false, + "precision": 2147483647, + "name": "d_date_id" + }, + { + "type": "DATE", + "nullable": true, + "name": "d_date" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_month_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_week_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_quarter_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_year" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_dow" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_moy" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_qoy" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_year" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_quarter_seq" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 16, - "name": "ca_address_id" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_street_number" + "precision": 6, + "name": "d_quarter_name" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 60, - "name": "ca_street_name" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 15, - "name": "ca_street_type" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_suite_number" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 60, - "name": "ca_city" + "name": "d_first_dom" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 30, - "name": "ca_county" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 2, - "name": "ca_state" + "precision": 1, + "name": "d_current_day" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "ca_zip" + "precision": 1, + "name": "d_current_week" }, { - "type": "VARCHAR", + "type": "CHAR", "nullable": true, - "precision": 20, - "name": "ca_country" + "precision": 1, + "name": "d_current_month" }, { - "type": "DECIMAL", + "type": "CHAR", "nullable": true, - "precision": 5, - "scale": 2, - "name": "ca_gmt_offset" + "precision": 1, + "name": "d_current_quarter" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "ca_location_type" + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -689,516 +1015,214 @@ }, "colStats": [ { - "name": "ca_address_sk", - "ndv": 40618307, + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 + }, + { + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 + }, + { + "name": "d_date_id", + "ndv": 71022 + }, + { + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 + }, + { + "name": "d_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 40000000 + "maxValue": 10436 }, { - "name": "ca_state", - "ndv": 53 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "ca_address_id", - "ndv": 39667899 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "ca_street_number", - "ndv": 1014 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "ca_street_name", - "ndv": 8358 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "ca_street_type", - "ndv": 21 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 }, { - "name": "ca_suite_number", - "ndv": 76 + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "ca_city", - "ndv": 985 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "ca_county", - "ndv": 1930 + "name": "d_fy_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "ca_zip", - "ndv": 9538 + "name": "d_fy_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "ca_country", - "ndv": 2 + "name": "d_day_name", + "ndv": 7 }, { - "name": "ca_gmt_offset", - "ndv": 6, - "minValue": -10, - "maxValue": -5 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "ca_location_type", - "ndv": 4 - } - ] - }, - { - "id": "4", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 8, - "name": "$8" - }, - { - "literal": "TX", - "type": { - "type": "CHAR", - "nullable": false, - "precision": 2 - } - } - ] - }, - "rowCount": 6000000 - }, - { - "id": "5", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", - "fields": [ - "ca_address_sk", - "ca_state" - ], - "exprs": [ - { - "input": 0, - "name": "$0" + "name": "d_holiday", + "ndv": 2 }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "literal": "TX", - "type": { - "type": "CHAR", - "nullable": false, - "precision": 2 - } - } - ], - "type": { - "type": "CHAR", - "nullable": true, - "precision": 2 - } - } - ], - "rowCount": 6000000 - }, - { - "id": "6", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", - "condition": { - "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" - }, - "operands": [ - { - "input": 1, - "name": "$1" - }, - { - "input": 6, - "name": "$6" - } - ] - }, - "joinType": "inner", - "algorithm": "none", - "cost": "not available", - "inputs": [ - "2", - "5" - ], - "rowCount": 14171783954867102 - }, - { - "id": "7", - "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", - "table": [ - "default", - "web_site" - ], - "table:alias": "web_site", - "inputs": [], - "rowCount": 84, - "avgRowSize": 1331, - "rowType": { - "fields": [ - { - "type": "BIGINT", - "nullable": false, - "name": "web_site_sk" - }, - { - "type": "VARCHAR", - "nullable": false, - "precision": 2147483647, - "name": "web_site_id" - }, - { - "type": "DATE", - "nullable": true, - "name": "web_rec_start_date" - }, - { - "type": "DATE", - "nullable": true, - "name": "web_rec_end_date" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "web_name" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "web_open_date_sk" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "web_close_date_sk" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "web_class" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 40, - "name": "web_manager" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "web_mkt_id" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 50, - "name": "web_mkt_class" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 100, - "name": "web_mkt_desc" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 40, - "name": "web_market_manager" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "web_company_id" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 50, - "name": "web_company_name" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "web_street_number" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 60, - "name": "web_street_name" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 15, - "name": "web_street_type" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "web_suite_number" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 60, - "name": "web_city" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 30, - "name": "web_county" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 2, - "name": "web_state" - }, - { - "type": "CHAR", - "nullable": true, - "precision": 10, - "name": "web_zip" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 20, - "name": "web_country" - }, - { - "type": "DECIMAL", - "nullable": true, - "precision": 5, - "scale": 2, - "name": "web_gmt_offset" - }, - { - "type": "DECIMAL", - "nullable": true, - "precision": 5, - "scale": 2, - "name": "web_tax_percentage" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "BLOCK__OFFSET__INSIDE__FILE" - }, - { - "type": "VARCHAR", - "nullable": true, - "precision": 2147483647, - "name": "INPUT__FILE__NAME" - }, - { - "fields": [ - { - "type": "BIGINT", - "nullable": true, - "name": "writeid" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "bucketid" - }, - { - "type": "BIGINT", - "nullable": true, - "name": "rowid" - } - ], - "nullable": true, - "name": "ROW__ID" - }, - { - "type": "BOOLEAN", - "nullable": true, - "name": "ROW__IS__DELETED" - } - ], - "nullable": false - }, - "colStats": [ - { - "name": "web_site_sk", - "ndv": 84, - "minValue": 1, - "maxValue": 84 - }, - { - "name": "web_company_name", - "ndv": 7 - }, - { - "name": "web_site_id", - "ndv": 42 - }, - { - "name": "web_rec_start_date", - "ndv": 0, - "minValue": 10089, - "maxValue": 11550 - }, - { - "name": "web_rec_end_date", - "ndv": 0, - "minValue": 10819, - "maxValue": 11549 - }, - { - "name": "web_name", - "ndv": 15 - }, - { - "name": "web_open_date_sk", - "ndv": 42, - "minValue": 2450118, - "maxValue": 2450807 - }, - { - "name": "web_close_date_sk", - "ndv": 28, - "minValue": 2440993, - "maxValue": 2446218 - }, - { - "name": "web_class", + "name": "d_weekend", "ndv": 2 }, { - "name": "web_manager", - "ndv": 60 - }, - { - "name": "web_mkt_id", - "ndv": 6, - "minValue": 1, - "maxValue": 6 - }, - { - "name": "web_mkt_class", - "ndv": 65 - }, - { - "name": "web_mkt_desc", - "ndv": 64 - }, - { - "name": "web_market_manager", - "ndv": 66 - }, - { - "name": "web_company_id", - "ndv": 6, - "minValue": 1, - "maxValue": 6 - }, - { - "name": "web_street_number", - "ndv": 58 - }, - { - "name": "web_street_name", - "ndv": 80 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "web_street_type", - "ndv": 20 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "web_suite_number", - "ndv": 51 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "web_city", - "ndv": 52 + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 }, { - "name": "web_county", - "ndv": 58 + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 }, { - "name": "web_state", - "ndv": 30 + "name": "d_current_day", + "ndv": 1 }, { - "name": "web_zip", - "ndv": 56 + "name": "d_current_week", + "ndv": 1 }, { - "name": "web_country", + "name": "d_current_month", "ndv": 2 }, { - "name": "web_gmt_offset", - "ndv": 4, - "minValue": -8, - "maxValue": -5 + "name": "d_current_quarter", + "ndv": 2 }, - { - "name": "web_tax_percentage", - "ndv": 13, - "minValue": 0, - "maxValue": 0.12 + { + "name": "d_current_year", + "ndv": 2 } ] }, { - "id": "8", + "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "=", - "kind": "EQUALS", - "syntax": "BINARY" + "name": "BETWEEN", + "kind": "BETWEEN", + "syntax": "SPECIAL" }, "operands": [ { - "input": 14, - "name": "$14" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "literal": "pri ", + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "CHAR", + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 + } + }, + { + "literal": 925516800000, + "type": { + "type": "TIMESTAMP", "nullable": false, - "precision": 50 + "precision": 9 + } + }, + { + "literal": 930700800000, + "type": { + "type": "TIMESTAMP", + "nullable": false, + "precision": 9 } } ] }, - "rowCount": 12.6 + "rowCount": 18262.25 }, { - "id": "9", + "id": "8", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "web_site_sk", - "web_company_name" + "d_date_sk", + "d_date" ], "exprs": [ { @@ -1206,29 +1230,40 @@ "name": "$0" }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" + "input": 2, + "name": "$2" + } + ], + "rowCount": 18262.25 + }, + { + "id": "9", + "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin", + "condition": { + "op": { + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" + }, + "operands": [ + { + "input": 0, + "name": "$0" }, - "operands": [ - { - "literal": "pri ", - "type": { - "type": "CHAR", - "nullable": false, - "precision": 50 - } - } - ], - "type": { - "type": "CHAR", - "nullable": true, - "precision": 50 + { + "input": 6, + "name": "$6" } - } + ] + }, + "joinType": "inner", + "algorithm": "none", + "cost": "not available", + "inputs": [ + "5", + "8" ], - "rowCount": 12.6 + "rowCount": 4.3134776921628625E13 }, { "id": "10", @@ -1241,12 +1276,12 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 3, + "name": "$3" }, { - "input": 8, - "name": "$8" + "input": 0, + "name": "$0" } ] }, @@ -1254,174 +1289,174 @@ "algorithm": "none", "cost": "not available", "inputs": [ - "6", + "2", "9" ], - "rowCount": 26784671674698820 + "rowCount": 3.8821299229465756E19 }, { "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "web_site" ], - "table:alias": "date_dim", + "table:alias": "web_site", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 84, + "avgRowSize": 1331, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "web_site_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "web_site_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" + "name": "web_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_month_seq" + "name": "web_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_week_seq" + "precision": 50, + "name": "web_name" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_quarter_seq" + "name": "web_open_date_sk" }, { - "type": "INTEGER", + "type": "BIGINT", "nullable": true, - "name": "d_year" + "name": "web_close_date_sk" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_dow" + "precision": 50, + "name": "web_class" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_moy" + "precision": 40, + "name": "web_manager" }, { "type": "INTEGER", "nullable": true, - "name": "d_dom" + "name": "web_mkt_id" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_qoy" + "precision": 50, + "name": "web_mkt_class" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_fy_year" + "precision": 100, + "name": "web_mkt_desc" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 40, + "name": "web_market_manager" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "web_company_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "web_company_name" }, { "type": "CHAR", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "precision": 10, + "name": "web_street_number" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 60, + "name": "web_street_name" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 15, + "name": "web_street_type" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_first_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_last_dom" + "precision": 10, + "name": "web_suite_number" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_same_day_ly" + "precision": 60, + "name": "web_city" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_same_day_lq" + "precision": 30, + "name": "web_county" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 2, + "name": "web_state" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "web_zip" }, { - "type": "CHAR", + "type": "VARCHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 20, + "name": "web_country" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "precision": 5, + "scale": 2, + "name": "web_gmt_offset" }, { - "type": "CHAR", + "type": "DECIMAL", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 5, + "scale": 2, + "name": "web_tax_percentage" }, { "type": "BIGINT", @@ -1465,150 +1500,126 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 + "name": "web_site_sk", + "ndv": 84, + "minValue": 1, + "maxValue": 84 }, { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 + "name": "web_company_name", + "ndv": 7 }, { - "name": "d_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "web_site_id", + "ndv": 42 }, { - "name": "d_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "web_rec_start_date", + "ndv": 4, + "minValue": 10089, + "maxValue": 11550 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "web_rec_end_date", + "ndv": 3, + "minValue": 10819, + "maxValue": 11549 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "web_name", + "ndv": 15 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "web_open_date_sk", + "ndv": 42, + "minValue": 2450118, + "maxValue": 2450807 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "web_close_date_sk", + "ndv": 28, + "minValue": 2440993, + "maxValue": 2446218 }, { - "name": "d_qoy", - "ndv": 4, - "minValue": 1, - "maxValue": 4 + "name": "web_class", + "ndv": 2 }, { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "web_manager", + "ndv": 60 }, { - "name": "d_fy_quarter_seq", - "ndv": 808, + "name": "web_mkt_id", + "ndv": 6, "minValue": 1, - "maxValue": 801 + "maxValue": 6 }, { - "name": "d_fy_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "web_mkt_class", + "ndv": 65 }, { - "name": "d_day_name", - "ndv": 7 + "name": "web_mkt_desc", + "ndv": 64 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "web_market_manager", + "ndv": 66 }, { - "name": "d_holiday", - "ndv": 2 + "name": "web_company_id", + "ndv": 6, + "minValue": 1, + "maxValue": 6 }, { - "name": "d_weekend", - "ndv": 2 + "name": "web_street_number", + "ndv": 58 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "web_street_name", + "ndv": 80 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "web_street_type", + "ndv": 20 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "web_suite_number", + "ndv": 51 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "web_city", + "ndv": 52 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "web_county", + "ndv": 58 }, { - "name": "d_current_day", - "ndv": 1 + "name": "web_state", + "ndv": 30 }, { - "name": "d_current_week", - "ndv": 1 + "name": "web_zip", + "ndv": 56 }, { - "name": "d_current_month", + "name": "web_country", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "web_gmt_offset", + "ndv": 4, + "minValue": -8, + "maxValue": -5 }, { - "name": "d_current_year", - "ndv": 2 + "name": "web_tax_percentage", + "ndv": 13, + "minValue": 0, + "maxValue": 0.12 } ] }, @@ -1617,62 +1628,33 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", - "syntax": "SPECIAL" + "name": "=", + "kind": "EQUALS", + "syntax": "BINARY" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } - }, - { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], - "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 - } - }, - { - "literal": 925516800000, - "type": { - "type": "TIMESTAMP", - "nullable": false, - "precision": 9 - } + "input": 14, + "name": "$14" }, { - "literal": 930700800000, + "literal": "pri ", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 50 } } ] }, - "rowCount": 18262.25 + "rowCount": 12.6 }, { "id": "13", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk", - "d_date" + "web_site_sk", + "web_company_name" ], "exprs": [ { @@ -1680,11 +1662,29 @@ "name": "$0" }, { - "input": 2, - "name": "$2" + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "literal": "pri ", + "type": { + "type": "CHAR", + "nullable": false, + "precision": 50 + } + } + ], + "type": { + "type": "CHAR", + "nullable": true, + "precision": 50 + } } ], - "rowCount": 18262.25 + "rowCount": 12.6 }, { "id": "14", @@ -1697,8 +1697,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 4, + "name": "$4" }, { "input": 10, @@ -1733,14 +1733,6 @@ "web_company_name" ], "exprs": [ - { - "input": 0, - "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, { "input": 2, "name": "$2" @@ -1757,14 +1749,6 @@ "input": 5, "name": "$5" }, - { - "input": 10, - "name": "$10" - }, - { - "input": 11, - "name": "$11" - }, { "input": 6, "name": "$6" @@ -1780,6 +1764,22 @@ { "input": 9, "name": "$9" + }, + { + "input": 0, + "name": "$0" + }, + { + "input": 1, + "name": "$1" + }, + { + "input": 10, + "name": "$10" + }, + { + "input": 11, + "name": "$11" } ], "rowCount": 7.337225554369027E19 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query96.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query96.q.out index 6810c6736668..797c04a85f3d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query96.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query96.q.out @@ -909,13 +909,13 @@ }, { "name": "s_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 9933, "maxValue": 11394 }, { "name": "s_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10663, "maxValue": 11393 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query97.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query97.q.out index dc4608e74323..3c6ead1de18d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query97.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query97.q.out @@ -602,7 +602,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query98.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query98.q.out index 230901e7cd30..60ab4eda30ab 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query98.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query98.q.out @@ -384,139 +384,164 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "item" + "date_dim" ], - "table:alias": "item", + "table:alias": "date_dim", "inputs": [], - "rowCount": 462000, - "avgRowSize": 1033, + "rowCount": 73049, + "avgRowSize": 347, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "i_item_sk" + "name": "d_date_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "i_item_id" + "name": "d_date_id" }, { "type": "DATE", "nullable": true, - "name": "i_rec_start_date" + "name": "d_date" }, { - "type": "DATE", + "type": "INTEGER", "nullable": true, - "name": "i_rec_end_date" + "name": "d_month_seq" }, { - "type": "VARCHAR", + "type": "INTEGER", "nullable": true, - "precision": 200, - "name": "i_item_desc" + "name": "d_week_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_current_price" + "name": "d_quarter_seq" }, { - "type": "DECIMAL", + "type": "INTEGER", "nullable": true, - "precision": 7, - "scale": 2, - "name": "i_wholesale_cost" + "name": "d_year" }, { "type": "INTEGER", "nullable": true, - "name": "i_brand_id" + "name": "d_dow" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_brand" + "name": "d_moy" }, { "type": "INTEGER", "nullable": true, - "name": "i_class_id" + "name": "d_dom" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_class" + "name": "d_qoy" }, { "type": "INTEGER", "nullable": true, - "name": "i_category_id" + "name": "d_fy_year" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 50, - "name": "i_category" + "name": "d_fy_quarter_seq" }, { "type": "INTEGER", "nullable": true, - "name": "i_manufact_id" + "name": "d_fy_week_seq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_manufact" + "precision": 9, + "name": "d_day_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_size" + "precision": 6, + "name": "d_quarter_name" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_formulation" + "precision": 1, + "name": "d_holiday" }, { "type": "CHAR", "nullable": true, - "precision": 20, - "name": "i_color" + "precision": 1, + "name": "d_weekend" }, { "type": "CHAR", "nullable": true, - "precision": 10, - "name": "i_units" + "precision": 1, + "name": "d_following_holiday" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 10, - "name": "i_container" + "name": "d_first_dom" }, { "type": "INTEGER", "nullable": true, - "name": "i_manager_id" + "name": "d_last_dom" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_ly" + }, + { + "type": "INTEGER", + "nullable": true, + "name": "d_same_day_lq" }, { "type": "CHAR", "nullable": true, - "precision": 50, - "name": "i_product_name" + "precision": 1, + "name": "d_current_day" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_week" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_month" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_quarter" + }, + { + "type": "CHAR", + "nullable": true, + "precision": 1, + "name": "d_current_year" }, { "type": "BIGINT", @@ -560,195 +585,221 @@ }, "colStats": [ { - "name": "i_item_sk", - "ndv": 464811, - "minValue": 1, - "maxValue": 462000 + "name": "d_date_sk", + "ndv": 67850, + "minValue": 2415022, + "maxValue": 2488070 }, { - "name": "i_item_id", - "ndv": 247524 + "name": "d_date", + "ndv": 76511, + "minValue": -25566, + "maxValue": 47482 }, { - "name": "i_item_desc", - "ndv": 341846 + "name": "d_date_id", + "ndv": 71022 }, { - "name": "i_current_price", - "ndv": 9391, - "minValue": 0.09, - "maxValue": 99.99 + "name": "d_month_seq", + "ndv": 2439, + "minValue": 0, + "maxValue": 2400 }, { - "name": "i_class", - "ndv": 99 + "name": "d_week_seq", + "ndv": 11297, + "minValue": 1, + "maxValue": 10436 }, { - "name": "i_category", - "ndv": 11 + "name": "d_quarter_seq", + "ndv": 808, + "minValue": 1, + "maxValue": 801 }, { - "name": "i_rec_start_date", - "ndv": 0, - "minValue": 10161, - "maxValue": 11622 + "name": "d_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_rec_end_date", - "ndv": 0, - "minValue": 10891, - "maxValue": 11621 + "name": "d_dow", + "ndv": 7, + "minValue": 0, + "maxValue": 6 }, { - "name": "i_wholesale_cost", - "ndv": 7343, - "minValue": 0.02, - "maxValue": 89.74 + "name": "d_moy", + "ndv": 12, + "minValue": 1, + "maxValue": 12 }, { - "name": "i_brand_id", - "ndv": 962, - "minValue": 1001001, - "maxValue": 10016017 + "name": "d_dom", + "ndv": 31, + "minValue": 1, + "maxValue": 31 }, { - "name": "i_brand", - "ndv": 742 + "name": "d_qoy", + "ndv": 4, + "minValue": 1, + "maxValue": 4 }, { - "name": "i_class_id", - "ndv": 16, - "minValue": 1, - "maxValue": 16 + "name": "d_fy_year", + "ndv": 199, + "minValue": 1900, + "maxValue": 2100 }, { - "name": "i_category_id", - "ndv": 10, + "name": "d_fy_quarter_seq", + "ndv": 808, "minValue": 1, - "maxValue": 10 + "maxValue": 801 }, { - "name": "i_manufact_id", - "ndv": 987, + "name": "d_fy_week_seq", + "ndv": 11297, "minValue": 1, - "maxValue": 1000 + "maxValue": 10436 }, { - "name": "i_manufact", - "ndv": 1004 + "name": "d_day_name", + "ndv": 7 }, { - "name": "i_size", - "ndv": 8 + "name": "d_quarter_name", + "ndv": 800 }, { - "name": "i_formulation", - "ndv": 344236 + "name": "d_holiday", + "ndv": 2 }, { - "name": "i_color", - "ndv": 95 + "name": "d_weekend", + "ndv": 2 }, { - "name": "i_units", - "ndv": 21 + "name": "d_following_holiday", + "ndv": 2 }, { - "name": "i_container", - "ndv": 2 + "name": "d_first_dom", + "ndv": 2332, + "minValue": 2415021, + "maxValue": 2488070 }, { - "name": "i_manager_id", - "ndv": 104, - "minValue": 1, - "maxValue": 100 + "name": "d_last_dom", + "ndv": 2401, + "minValue": 2415020, + "maxValue": 2488372 }, { - "name": "i_product_name", - "ndv": 461487 - } - ] - }, - { + "name": "d_same_day_ly", + "ndv": 67791, + "minValue": 2414657, + "maxValue": 2487705 + }, + { + "name": "d_same_day_lq", + "ndv": 67904, + "minValue": 2414930, + "maxValue": 2487978 + }, + { + "name": "d_current_day", + "ndv": 1 + }, + { + "name": "d_current_week", + "ndv": 1 + }, + { + "name": "d_current_month", + "ndv": 2 + }, + { + "name": "d_current_quarter", + "ndv": 2 + }, + { + "name": "d_current_year", + "ndv": 2 + } + ] + }, + { "id": "4", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "IN", - "kind": "OTHER_FUNCTION", + "name": "BETWEEN", + "kind": "BETWEEN", "syntax": "SPECIAL" }, "operands": [ { - "input": 12, - "name": "$12" + "literal": false, + "type": { + "type": "BOOLEAN", + "nullable": false + } }, { - "literal": "Books", + "op": { + "name": "CAST", + "kind": "CAST", + "syntax": "SPECIAL" + }, + "operands": [ + { + "input": 2, + "name": "$2" + } + ], "type": { - "type": "CHAR", - "nullable": false, - "precision": 5 + "type": "TIMESTAMP", + "nullable": true, + "precision": 9 } }, { - "literal": "Jewelry", + "literal": 979257600000, "type": { - "type": "CHAR", + "type": "TIMESTAMP", "nullable": false, - "precision": 7 + "precision": 9 } }, { - "literal": "Sports", + "literal": 981849600000, "type": { - "type": "CHAR", + "type": "TIMESTAMP", "nullable": false, - "precision": 6 + "precision": 9 } } ] }, - "rowCount": 115500 + "rowCount": 18262.25 }, { "id": "5", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "i_item_sk", - "i_item_id", - "i_item_desc", - "i_current_price", - "i_class", - "i_category" + "d_date_sk" ], "exprs": [ { "input": 0, "name": "$0" - }, - { - "input": 1, - "name": "$1" - }, - { - "input": 4, - "name": "$4" - }, - { - "input": 5, - "name": "$5" - }, - { - "input": 10, - "name": "$10" - }, - { - "input": 12, - "name": "$12" } ], - "rowCount": 115500 + "rowCount": 18262.25 }, { "id": "6", @@ -761,8 +812,8 @@ }, "operands": [ { - "input": 0, - "name": "$0" + "input": 2, + "name": "$2" }, { "input": 3, @@ -777,171 +828,146 @@ "2", "5" ], - "rowCount": 1.2865508954488575E15 + "rowCount": 2.0342263281741038E14 }, { "id": "7", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan", "table": [ "default", - "date_dim" + "item" ], - "table:alias": "date_dim", + "table:alias": "item", "inputs": [], - "rowCount": 73049, - "avgRowSize": 347, + "rowCount": 462000, + "avgRowSize": 1033, "rowType": { "fields": [ { "type": "BIGINT", "nullable": false, - "name": "d_date_sk" + "name": "i_item_sk" }, { "type": "VARCHAR", "nullable": false, "precision": 2147483647, - "name": "d_date_id" + "name": "i_item_id" }, { "type": "DATE", "nullable": true, - "name": "d_date" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_month_seq" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_week_seq" + "name": "i_rec_start_date" }, { - "type": "INTEGER", + "type": "DATE", "nullable": true, - "name": "d_quarter_seq" + "name": "i_rec_end_date" }, { - "type": "INTEGER", + "type": "VARCHAR", "nullable": true, - "name": "d_year" + "precision": 200, + "name": "i_item_desc" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_dow" + "precision": 7, + "scale": 2, + "name": "i_current_price" }, { - "type": "INTEGER", + "type": "DECIMAL", "nullable": true, - "name": "d_moy" + "precision": 7, + "scale": 2, + "name": "i_wholesale_cost" }, { "type": "INTEGER", "nullable": true, - "name": "d_dom" + "name": "i_brand_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_qoy" + "precision": 50, + "name": "i_brand" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_year" + "name": "i_class_id" }, { - "type": "INTEGER", + "type": "CHAR", "nullable": true, - "name": "d_fy_quarter_seq" + "precision": 50, + "name": "i_class" }, { "type": "INTEGER", "nullable": true, - "name": "d_fy_week_seq" + "name": "i_category_id" }, { "type": "CHAR", "nullable": true, - "precision": 9, - "name": "d_day_name" + "precision": 50, + "name": "i_category" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 6, - "name": "d_quarter_name" + "name": "i_manufact_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_holiday" + "precision": 50, + "name": "i_manufact" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_weekend" + "precision": 20, + "name": "i_size" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_following_holiday" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_first_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_last_dom" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_ly" - }, - { - "type": "INTEGER", - "nullable": true, - "name": "d_same_day_lq" + "precision": 20, + "name": "i_formulation" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_day" + "precision": 20, + "name": "i_color" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_week" + "precision": 10, + "name": "i_units" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_month" + "precision": 10, + "name": "i_container" }, { - "type": "CHAR", + "type": "INTEGER", "nullable": true, - "precision": 1, - "name": "d_current_quarter" + "name": "i_manager_id" }, { "type": "CHAR", "nullable": true, - "precision": 1, - "name": "d_current_year" + "precision": 50, + "name": "i_product_name" }, { "type": "BIGINT", @@ -985,150 +1011,112 @@ }, "colStats": [ { - "name": "d_date_sk", - "ndv": 67850, - "minValue": 2415022, - "maxValue": 2488070 - }, - { - "name": "d_date", - "ndv": 0, - "minValue": -25566, - "maxValue": 47482 - }, - { - "name": "d_date_id", - "ndv": 71022 - }, - { - "name": "d_month_seq", - "ndv": 2439, - "minValue": 0, - "maxValue": 2400 - }, - { - "name": "d_week_seq", - "ndv": 11297, + "name": "i_item_sk", + "ndv": 464811, "minValue": 1, - "maxValue": 10436 + "maxValue": 462000 }, { - "name": "d_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "name": "i_item_id", + "ndv": 247524 }, { - "name": "d_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 + "name": "i_item_desc", + "ndv": 341846 }, { - "name": "d_dow", - "ndv": 7, - "minValue": 0, - "maxValue": 6 + "name": "i_current_price", + "ndv": 9391, + "minValue": 0.09, + "maxValue": 99.99 }, { - "name": "d_moy", - "ndv": 12, - "minValue": 1, - "maxValue": 12 + "name": "i_class", + "ndv": 99 }, { - "name": "d_dom", - "ndv": 31, - "minValue": 1, - "maxValue": 31 + "name": "i_category", + "ndv": 11 }, { - "name": "d_qoy", + "name": "i_rec_start_date", "ndv": 4, - "minValue": 1, - "maxValue": 4 - }, - { - "name": "d_fy_year", - "ndv": 199, - "minValue": 1900, - "maxValue": 2100 - }, - { - "name": "d_fy_quarter_seq", - "ndv": 808, - "minValue": 1, - "maxValue": 801 + "minValue": 10161, + "maxValue": 11622 }, { - "name": "d_fy_week_seq", - "ndv": 11297, - "minValue": 1, - "maxValue": 10436 + "name": "i_rec_end_date", + "ndv": 3, + "minValue": 10891, + "maxValue": 11621 }, { - "name": "d_day_name", - "ndv": 7 + "name": "i_wholesale_cost", + "ndv": 7343, + "minValue": 0.02, + "maxValue": 89.74 }, { - "name": "d_quarter_name", - "ndv": 800 + "name": "i_brand_id", + "ndv": 962, + "minValue": 1001001, + "maxValue": 10016017 }, { - "name": "d_holiday", - "ndv": 2 + "name": "i_brand", + "ndv": 742 }, { - "name": "d_weekend", - "ndv": 2 + "name": "i_class_id", + "ndv": 16, + "minValue": 1, + "maxValue": 16 }, { - "name": "d_following_holiday", - "ndv": 2 + "name": "i_category_id", + "ndv": 10, + "minValue": 1, + "maxValue": 10 }, { - "name": "d_first_dom", - "ndv": 2332, - "minValue": 2415021, - "maxValue": 2488070 + "name": "i_manufact_id", + "ndv": 987, + "minValue": 1, + "maxValue": 1000 }, { - "name": "d_last_dom", - "ndv": 2401, - "minValue": 2415020, - "maxValue": 2488372 + "name": "i_manufact", + "ndv": 1004 }, { - "name": "d_same_day_ly", - "ndv": 67791, - "minValue": 2414657, - "maxValue": 2487705 + "name": "i_size", + "ndv": 8 }, { - "name": "d_same_day_lq", - "ndv": 67904, - "minValue": 2414930, - "maxValue": 2487978 + "name": "i_formulation", + "ndv": 344236 }, { - "name": "d_current_day", - "ndv": 1 + "name": "i_color", + "ndv": 95 }, { - "name": "d_current_week", - "ndv": 1 + "name": "i_units", + "ndv": 21 }, { - "name": "d_current_month", + "name": "i_container", "ndv": 2 }, { - "name": "d_current_quarter", - "ndv": 2 + "name": "i_manager_id", + "ndv": 104, + "minValue": 1, + "maxValue": 100 }, { - "name": "d_current_year", - "ndv": 2 + "name": "i_product_name", + "ndv": 461487 } ] }, @@ -1137,69 +1125,81 @@ "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter", "condition": { "op": { - "name": "BETWEEN", - "kind": "BETWEEN", + "name": "IN", + "kind": "OTHER_FUNCTION", "syntax": "SPECIAL" }, "operands": [ { - "literal": false, - "type": { - "type": "BOOLEAN", - "nullable": false - } + "input": 12, + "name": "$12" }, { - "op": { - "name": "CAST", - "kind": "CAST", - "syntax": "SPECIAL" - }, - "operands": [ - { - "input": 2, - "name": "$2" - } - ], + "literal": "Books", "type": { - "type": "TIMESTAMP", - "nullable": true, - "precision": 9 + "type": "CHAR", + "nullable": false, + "precision": 5 } }, { - "literal": 979257600000, + "literal": "Jewelry", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 7 } }, { - "literal": 981849600000, + "literal": "Sports", "type": { - "type": "TIMESTAMP", + "type": "CHAR", "nullable": false, - "precision": 9 + "precision": 6 } } ] }, - "rowCount": 18262.25 + "rowCount": 115500 }, { "id": "9", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject", "fields": [ - "d_date_sk" + "i_item_sk", + "i_item_id", + "i_item_desc", + "i_current_price", + "i_class", + "i_category" ], "exprs": [ { "input": 0, "name": "$0" + }, + { + "input": 1, + "name": "$1" + }, + { + "input": 4, + "name": "$4" + }, + { + "input": 5, + "name": "$5" + }, + { + "input": 10, + "name": "$10" + }, + { + "input": 12, + "name": "$12" } ], - "rowCount": 18262.25 + "rowCount": 115500 }, { "id": "10", @@ -1212,12 +1212,12 @@ }, "operands": [ { - "input": 2, - "name": "$2" + "input": 0, + "name": "$0" }, { - "input": 9, - "name": "$9" + "input": 4, + "name": "$4" } ] }, @@ -1234,11 +1234,11 @@ "id": "11", "relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate", "group": [ - 4, 5, 6, 7, - 8 + 8, + 9 ], "aggs": [ { diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query99.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query99.q.out index 9d332c859bc6..d589e67ac45c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query99.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/json/query99.q.out @@ -1258,7 +1258,7 @@ }, { "name": "d_date", - "ndv": 0, + "ndv": 76511, "minValue": -25566, "maxValue": 47482 }, @@ -2163,13 +2163,13 @@ }, { "name": "cc_rec_start_date", - "ndv": 0, + "ndv": 4, "minValue": 10227, "maxValue": 11688 }, { "name": "cc_rec_end_date", - "ndv": 0, + "ndv": 3, "minValue": 10957, "maxValue": 11687 }, diff --git a/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out b/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out index 7fd56312c5af..8194bfa2db6f 100644 --- a/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out +++ b/ql/src/test/results/clientpositive/tez/flatten_union_subdir.q.out @@ -60,7 +60,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_nonacid_directinsert_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -112,8 +112,8 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_nonacid_directinsert_flattened POSTHOOK: Output: default@union_target_nonacid_directinsert_flattened@dt=20230817 POSTHOOK: Lineage: union_target_nonacid_directinsert_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_nonacid_directinsert_flattened)union_target_nonacid_directinsert_flattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_nonacid_directinsert_flattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_nonacid_directinsert_flattened @@ -171,7 +171,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_mm_directinsert_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -223,9 +223,9 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_mm_directinsert_flattened POSTHOOK: Output: default@union_target_mm_directinsert_flattened@dt=20230817 POSTHOOK: Lineage: union_target_mm_directinsert_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_mm_directinsert_flattened)union_target_mm_directinsert_flattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_mm_directinsert_flattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_mm_directinsert_flattened @@ -283,7 +283,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_acid_directinsert_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -335,10 +335,10 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_acid_directinsert_flattened POSTHOOK: Output: default@union_target_acid_directinsert_flattened@dt=20230817 POSTHOOK: Lineage: union_target_acid_directinsert_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_acid_directinsert_flattened)union_target_acid_directinsert_flattened.FieldSchema(name:val, type:string, comment:null), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_acid_directinsert_flattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_acid_directinsert_flattened @@ -396,7 +396,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_mm_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -448,9 +448,9 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_mm_flattened POSTHOOK: Output: default@union_target_mm_flattened@dt=20230817 POSTHOOK: Lineage: union_target_mm_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_mm_flattened)union_target_mm_flattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_mm_flattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_mm_flattened @@ -508,7 +508,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_acid_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -560,10 +560,10 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_acid_flattened POSTHOOK: Output: default@union_target_acid_flattened@dt=20230817 POSTHOOK: Lineage: union_target_acid_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_acid_flattened)union_target_acid_flattened.FieldSchema(name:val, type:string, comment:null), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_acid_flattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_acid_flattened @@ -621,7 +621,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_mm_unflattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -673,10 +673,10 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_mm_unflattened POSTHOOK: Output: default@union_target_mm_unflattened@dt=20230817 POSTHOOK: Lineage: union_target_mm_unflattened PARTITION(dt=20230817).val EXPRESSION [(union_target_mm_unflattened)union_target_mm_unflattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_mm_unflattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_mm_unflattened @@ -734,7 +734,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_acid_unflattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -786,10 +786,10 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_acid_unflattened POSTHOOK: Output: default@union_target_acid_unflattened@dt=20230817 POSTHOOK: Lineage: union_target_acid_unflattened PARTITION(dt=20230817).val EXPRESSION [(union_target_acid_unflattened)union_target_acid_unflattened.FieldSchema(name:val, type:string, comment:null), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_acid_unflattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_acid_unflattened @@ -847,7 +847,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_mm_directinsert_unflattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -899,10 +899,10 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_mm_directinsert_unflattened POSTHOOK: Output: default@union_target_mm_directinsert_unflattened@dt=20230817 POSTHOOK: Lineage: union_target_mm_directinsert_unflattened PARTITION(dt=20230817).val EXPRESSION [(union_target_mm_directinsert_unflattened)union_target_mm_directinsert_unflattened.FieldSchema(name:val, type:string, comment:), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxrwxrwx - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxrwxrwx - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_mm_directinsert_unflattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_mm_directinsert_unflattened @@ -960,7 +960,7 @@ Stage-3 <-Map 1 [CONTAINS] vectorized File Output Operator [FS_57] table:{"name:":"default.union_target_acid_nondirectinsert_flattened"} - Map Join Operator [MAPJOIN_56] (rows=2 width=96) + Map Join Operator [MAPJOIN_56] (rows=2 width=84) Conds:SEL_55._col0=RS_53._col0(Left Outer),Output:["_col0"] <-Map 4 [BROADCAST_EDGE] vectorized BROADCAST [RS_53] @@ -1012,10 +1012,10 @@ POSTHOOK: Input: default@test1@dt=20230817 POSTHOOK: Input: default@union_target_acid_nondirectinsert_flattened POSTHOOK: Output: default@union_target_acid_nondirectinsert_flattened@dt=20230817 POSTHOOK: Lineage: union_target_acid_nondirectinsert_flattened PARTITION(dt=20230817).val EXPRESSION [(union_target_acid_nondirectinsert_flattened)union_target_acid_nondirectinsert_flattened.FieldSchema(name:val, type:string, comment:null), (test1)test1.FieldSchema(name:val, type:string, comment:), ] -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### -drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### --rw-rw-rw- 3 ### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +drwxr-xr-x - konstantin.### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### +-rw-rw-rw- 3 konstantin.### USER ### ### GROUP ### ### SIZE ### ### HDFS DATE ### hdfs://### HDFS PATH ### PREHOOK: query: select * from union_target_acid_nondirectinsert_flattened PREHOOK: type: QUERY PREHOOK: Input: default@union_target_acid_nondirectinsert_flattened