From f2dcdffeaee6ac938461391a98324a8d65fe0e09 Mon Sep 17 00:00:00 2001 From: Thomas Rebele Date: Thu, 12 Feb 2026 11:33:40 +0100 Subject: [PATCH] HIVE-29365: Range predicate within the same histogram bucket leads to an estimate rowcount=1 --- .../calcite/stats/FilterSelectivityEstimator.java | 6 +++++- .../calcite/stats/TestFilterSelectivityEstimator.java | 9 +++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java index b18c525c8849..818478604cde 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java @@ -492,7 +492,11 @@ public Double visitLiteral(RexLiteral literal) { private static double rangedSelectivity(KllFloatsSketch kll, float val1, float val2) { float[] splitPoints = new float[] { val1, val2 }; double[] boundaries = kll.getCDF(splitPoints, QuantileSearchCriteria.EXCLUSIVE); - return boundaries[1] - boundaries[0]; + // due to the way the KLL sketch is constructed, + // it is not possible to differentiate selectivity values below the error + // (e.g., if the error is 2%, a real selectivity of 1.5% might be estimated as 0% by KLL) + double normalizedRankError = kll.getNormalizedRankError(false); + return Math.max(boundaries[1] - boundaries[0], normalizedRankError); } /** diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java index 4255c756e078..9a6df23fc1e4 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestFilterSelectivityEstimator.java @@ -67,6 +67,7 @@ public class TestFilterSelectivityEstimator { private static final float[] VALUES = { 1, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 7 }; private static final KllFloatsSketch KLL = StatisticsTestUtils.createKll(VALUES); private static final float DELTA = Float.MIN_VALUE; + private static final double MIN_KLL_SELECTIVITY = 0.013294757464848584; private static final RexBuilder REX_BUILDER = new RexBuilder(new JavaTypeFactoryImpl(new HiveTypeSystemImpl())); private static final RelDataTypeFactory TYPE_FACTORY = REX_BUILDER.getTypeFactory(); private static RelOptCluster relOptCluster; @@ -240,12 +241,12 @@ public void testBetweenSelectivityLeftLowerThanMin() { @Test public void testBetweenSelectivityRightLowerThanMin() { - Assert.assertEquals(0, betweenSelectivity(KLL, -1, 0), DELTA); + Assert.assertEquals(MIN_KLL_SELECTIVITY, betweenSelectivity(KLL, -1, 0), DELTA); } @Test public void testBetweenSelectivityLeftHigherThanMax() { - Assert.assertEquals(0, betweenSelectivity(KLL, 10, 11), DELTA); + Assert.assertEquals(MIN_KLL_SELECTIVITY, betweenSelectivity(KLL, 10, 11), DELTA); } @Test @@ -399,7 +400,7 @@ public void testComputeRangePredicateSelectivityBetweenRightLowerThanMin() { doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0)); RexNode filter = REX_BUILDER.makeCall(HiveBetween.INSTANCE, boolFalse, inputRef0, intMinus1, int0); FilterSelectivityEstimator estimator = new FilterSelectivityEstimator(scan, mq); - Assert.assertEquals(0, estimator.estimateSelectivity(filter), DELTA); + Assert.assertEquals(MIN_KLL_SELECTIVITY, estimator.estimateSelectivity(filter), DELTA); } @Test @@ -407,7 +408,7 @@ public void testComputeRangePredicateSelectivityBetweenLeftHigherThanMax() { doReturn(Collections.singletonList(stats)).when(tableMock).getColStat(Collections.singletonList(0)); RexNode filter = REX_BUILDER.makeCall(HiveBetween.INSTANCE, boolFalse, inputRef0, int10, int11); FilterSelectivityEstimator estimator = new FilterSelectivityEstimator(scan, mq); - Assert.assertEquals(0, estimator.estimateSelectivity(filter), DELTA); + Assert.assertEquals(MIN_KLL_SELECTIVITY, estimator.estimateSelectivity(filter), DELTA); } @Test