Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tbl_ice
filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean)
filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((not (b) IN ('four', 'one')) and (a <> 22)) or (b) IN ('four', 'one') or (a = 22)) (type: boolean)
Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((not (b) IN ('four', 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,27 +150,27 @@ Stage-0
File Output Operator [FS_61]
Limit [LIM_60] (rows=20 width=447)
Number of rows:20
Select Operator [SEL_59] (rows=473 width=447)
Select Operator [SEL_59] (rows=784 width=447)
Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 1 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_58]
Top N Key Operator [TNK_57] (rows=473 width=447)
Top N Key Operator [TNK_57] (rows=784 width=447)
keys:_col0,top n:20
Map Join Operator [MAPJOIN_56] (rows=473 width=447)
Map Join Operator [MAPJOIN_56] (rows=784 width=447)
BucketMapJoin:true,Conds:SEL_55._col0, _col1=RS_53._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 3 [CUSTOM_EDGE] vectorized, llap
MULTICAST [RS_53]
PartitionCols:_col0, _col1
Select Operator [SEL_52] (rows=387 width=178)
Select Operator [SEL_52] (rows=497 width=178)
Output:["_col0","_col1"]
Filter Operator [FIL_51] (rows=387 width=178)
predicate:(((key < '0') or ((key > '0') and (key < '100')) or (key > '100')) and value is not null)
Filter Operator [FIL_51] (rows=497 width=178)
predicate:((not (key) IN ('0', '100')) and value is not null)
TableScan [TS_3] (rows=500 width=178)
default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
<-Select Operator [SEL_55] (rows=387 width=269)
<-Select Operator [SEL_55] (rows=497 width=269)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_54] (rows=387 width=269)
predicate:(((key1 < '0') or ((key1 > '0') and (key1 < '100')) or (key1 > '100')) and key2 is not null)
Filter Operator [FIL_54] (rows=497 width=269)
predicate:((not (key1) IN ('0', '100')) and key2 is not null)
TableScan [TS_0] (rows=500 width=269)
default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:8,Grouping Partition Columns:["key1","key2"],Output:["key1","key2","value"]

Expand Down Expand Up @@ -346,27 +346,27 @@ Stage-0
File Output Operator [FS_41]
Limit [LIM_40] (rows=20 width=447)
Number of rows:20
Select Operator [SEL_39] (rows=473 width=447)
Select Operator [SEL_39] (rows=784 width=447)
Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 1 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_38]
Top N Key Operator [TNK_37] (rows=473 width=447)
Top N Key Operator [TNK_37] (rows=784 width=447)
keys:_col0,top n:20
Map Join Operator [MAPJOIN_36] (rows=473 width=447)
Map Join Operator [MAPJOIN_36] (rows=784 width=447)
BucketMapJoin:true,Conds:SEL_35._col0=RS_33._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 3 [CUSTOM_EDGE] vectorized, llap
MULTICAST [RS_33]
PartitionCols:_col0
Select Operator [SEL_32] (rows=387 width=178)
Select Operator [SEL_32] (rows=497 width=178)
Output:["_col0","_col1"]
Filter Operator [FIL_31] (rows=387 width=178)
predicate:((key < '0') or (key > '100') or ((key > '0') and (key < '100')))
Filter Operator [FIL_31] (rows=497 width=178)
predicate:(not (key) IN ('0', '100'))
TableScan [TS_3] (rows=500 width=178)
default@src,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"]
<-Select Operator [SEL_35] (rows=387 width=269)
<-Select Operator [SEL_35] (rows=497 width=269)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_34] (rows=387 width=269)
predicate:((key1 < '0') or (key1 > '100') or ((key1 > '0') and (key1 < '100')))
Filter Operator [FIL_34] (rows=497 width=269)
predicate:(not (key1) IN ('0', '100'))
TableScan [TS_0] (rows=500 width=269)
default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:4,Grouping Partition Columns:["key1"],Output:["key1","key2","value"]

Expand Down Expand Up @@ -446,21 +446,21 @@ Stage-0
File Output Operator [FS_41]
Limit [LIM_40] (rows=20 width=447)
Number of rows:20
Select Operator [SEL_39] (rows=612 width=447)
Select Operator [SEL_39] (rows=786 width=447)
Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 2 [SIMPLE_EDGE] vectorized, llap
SHUFFLE [RS_38]
Top N Key Operator [TNK_37] (rows=612 width=447)
Top N Key Operator [TNK_37] (rows=786 width=447)
keys:_col0,top n:20
Map Join Operator [MAPJOIN_36] (rows=612 width=447)
Map Join Operator [MAPJOIN_36] (rows=786 width=447)
Conds:RS_33._col0=SEL_35._col0(Inner),Output:["_col0","_col1","_col2","_col3","_col4"]
<-Map 1 [BROADCAST_EDGE] vectorized, llap
BROADCAST [RS_33]
PartitionCols:_col0
Select Operator [SEL_32] (rows=387 width=269)
Select Operator [SEL_32] (rows=497 width=269)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_31] (rows=387 width=269)
predicate:(((key2 < 'val_0') or ((key2 > 'val_0') and (key2 < 'val_100')) or (key2 > 'val_100')) and key1 is not null)
Filter Operator [FIL_31] (rows=497 width=269)
predicate:((not (key2) IN ('val_0', 'val_100')) and key1 is not null)
TableScan [TS_0] (rows=500 width=269)
default@srcbucket_big,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key1","key2","value"]
<-Select Operator [SEL_35] (rows=500 width=178)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tbl_ice
filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean)
filterExpr: (((b) IN ('four', 'one') or (a = 22)) is null or ((not (b) IN ('four', 'one')) and (a <> 22))) (type: boolean)
Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL
Filter Operator
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((not (b) IN ('four', 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: PARTIAL
Select Operator
expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: tbl_ice
filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) (type: boolean)
filterExpr: ((a = 22) or (b) IN ('four', 'one') or ((b) IN ('four', 'one') or (a = 22)) is null or ((not (b) IN ('four', 'one')) and (a <> 22))) (type: boolean)
Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((a = 22) or (b) IN ('four', 'one')) (type: boolean)
Expand All @@ -93,7 +93,7 @@ STAGE PLANS:
Map-reduce partition columns: FILE__PATH (type: string)
Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or (((b < 'four') or ((b > 'four') and (b < 'one')) or (b > 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
predicate: ((((b) IN ('four', 'one') or (a = 22)) is null or ((not (b) IN ('four', 'one')) and (a <> 22))) and FILE__PATH is not null) (type: boolean)
Statistics: Num rows: 7 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: a (type: int), b (type: string), c (type: int), PARTITION__SPEC__ID (type: int), PARTITION__HASH (type: bigint), FILE__PATH (type: string), ROW__POSITION (type: bigint), PARTITION__PROJECTION (type: string)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
import org.apache.hadoop.hive.ql.session.SessionState;

import com.google.common.collect.Range;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Set;

/**
* A class that transforms a call to the internal {@link SqlStdOperatorTable#SEARCH} operator into an equivalent
Expand Down Expand Up @@ -76,26 +79,47 @@ public RexNode transform() {
PerfLogger perfLogger = SessionState.getPerfLogger();
perfLogger.perfLogBegin(this.getClass().getName(), PerfLogger.SEARCH_TRANSFORMER);

RangeConverter<C> consumer = new RangeConverter<>(rexBuilder, operandType, ref);
RangeSets.forEach(sarg.rangeSet, consumer);

List<RexNode> orList = new ArrayList<>();
if (sarg.nullAs == RexUnknownAs.TRUE && unknownContext != RexUnknownAs.TRUE) {
orList.add(rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ref));
}
switch (consumer.inLiterals.size()) {
case 0:
break;
case 1:
orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0)));
break;
default:
List<RexNode> operands = new ArrayList<>(consumer.inLiterals.size() + 1);
operands.add(ref);
operands.addAll(consumer.inLiterals);
orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands));

if (sarg.isComplementedPoints()) {
Set<Range<C>> rangeSet = sarg.rangeSet.complement().asRanges();
if (rangeSet.size() == 1) {
// Generate ref <> value
Range<C> range = rangeSet.iterator().next();
RexNode notEq = rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, ref,
rexBuilder.makeLiteral(range.lowerEndpoint(), operandType, true, true));
orList.add(notEq);
} else {
// Generate NOT (ref IN (value1, value2,... valueN)); which is better for partition pruning and CNF distribution
List<RexNode> notInLiterals = rangeSet.stream().map(
range -> rexBuilder.makeLiteral(range.lowerEndpoint(), operandType, true, true))
.toList();
List<RexNode> operands = new ArrayList<>(rangeSet.size() + 1);
operands.add(ref);
operands.addAll(notInLiterals);
orList.add(rexBuilder.makeCall(SqlStdOperatorTable.NOT, rexBuilder.makeCall(HiveIn.INSTANCE, operands)));
}
} else {
RangeConverter<C> consumer = new RangeConverter<>(rexBuilder, operandType, ref);
RangeSets.forEach(sarg.rangeSet, consumer);

switch (consumer.inLiterals.size()) {
case 0:
break;
case 1:
orList.add(rexBuilder.makeCall(SqlStdOperatorTable.EQUALS, ref, consumer.inLiterals.get(0)));
break;
default:
List<RexNode> operands = new ArrayList<>(consumer.inLiterals.size() + 1);
operands.add(ref);
operands.addAll(consumer.inLiterals);
orList.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands));
}
orList.addAll(consumer.nodes);
}
orList.addAll(consumer.nodes);
RexNode x = RexUtil.composeDisjunction(rexBuilder, orList);

if (sarg.nullAs == RexUnknownAs.FALSE && unknownContext != RexUnknownAs.FALSE) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -628,14 +628,23 @@ private RexNode makeLiteral(C value) {
private double compute() {
final List<RexNode> inLiterals = new ArrayList<>();
final List<Double> rangeSelectivities = new ArrayList<>();
for (Range<C> range : sarg.rangeSet.asRanges()) {
if (!range.hasLowerBound() && !range.hasUpperBound()) {
return 1.0; // "all" range
final List<Double> searchSelectivities = new ArrayList<>();

if (sarg.isComplementedPoints()) {
// Generate 'ref <> value1 AND ... AND ref <> valueN'
List<RexNode> notEq = sarg.rangeSet.complement().asRanges().stream()
.map(range -> rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, ref, makeLiteral(range.lowerEndpoint())))
.toList();
searchSelectivities.add(RexUtil.composeConjunction(rexBuilder, notEq).accept(FilterSelectivityEstimator.this));
} else {
for (Range<C> range : sarg.rangeSet.asRanges()) {
if (!range.hasLowerBound() && !range.hasUpperBound()) {
return 1.0; // "all" range
}
processRangeSelectivity(range, rangeSelectivities, inLiterals);
}
processRangeSelectivity(range, rangeSelectivities, inLiterals);
}

final List<Double> searchSelectivities = new ArrayList<>();
if (!rangeSelectivities.isEmpty() && rangeSelectivities.stream().noneMatch(Objects::isNull)) {
// Aggregate all ranges selectivity, respecting the max value of 1
double total = Math.min(1.0, rangeSelectivities.stream().mapToDouble(Double::doubleValue).sum());
Expand All @@ -655,7 +664,8 @@ private double compute() {
List<RexNode> operands = new ArrayList<>(inLiterals.size() + 1);
operands.add(ref);
operands.addAll(inLiterals);
searchSelectivities.add(rexBuilder.makeCall(HiveIn.INSTANCE, operands).accept(FilterSelectivityEstimator.this));
searchSelectivities.add(
rexBuilder.makeCall(HiveIn.INSTANCE, operands).accept(FilterSelectivityEstimator.this));
}
}

Expand All @@ -664,7 +674,9 @@ private double compute() {
rexBuilder.makeCall(SqlStdOperatorTable.IS_NULL, ref).accept(FilterSelectivityEstimator.this));
}

return searchSelectivities.size() == 1 ? searchSelectivities.get(0) : computeDisjunctionSelectivity(searchSelectivities);
return searchSelectivities.size() == 1
? searchSelectivities.get(0)
: computeDisjunctionSelectivity(searchSelectivities);
}

private void processRangeSelectivity(Range<C> range, List<Double> rangeSelectivities, List<RexNode> inLiterals) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,17 @@ public void testBetweenSelectivityLeftEqualsRight_KO() {
betweenSelectivity(KLL, 2, 2);
}

@Test
public void testComputeNotEqualsPredicateSelectivity() {
RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.AND,
REX_BUILDER.makeCall(SqlStdOperatorTable.NOT_EQUALS, inputRef0, int3),
REX_BUILDER.makeCall(SqlStdOperatorTable.NOT_EQUALS, inputRef0, int7));
filter = simplify(filter);
Assert.assertEquals(SqlKind.SEARCH, filter.getKind());
FilterSelectivityEstimator estimator = new FilterSelectivityEstimator(scan, mq);
Assert.assertEquals(0.7346938775510203, estimator.estimateSelectivity(filter), DELTA);
}

@Test
public void testComputeRangePredicateSelectivityWhenNoStats() {
RexNode filter = REX_BUILDER.makeCall(SqlStdOperatorTable.LESS_THAN, inputRef0, int3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: predicate_fold_tb
filterExpr: (value is null or (value < 3) or (value > 3)) (type: boolean)
filterExpr: ((value <> 3) or value is null) (type: boolean)
Filter Operator
predicate: (value is null or (value < 3) or (value > 3)) (type: boolean)
predicate: ((value <> 3) or value is null) (type: boolean)
Select Operator
expressions: value (type: int)
outputColumnNames: _col0
Expand Down
Loading
Loading