From 397992124a02da635fd42ec213bbb55e005eede0 Mon Sep 17 00:00:00 2001 From: xunxunmimi5577 <52647492+xunxunmimi5577@users.noreply.github.com> Date: Thu, 25 Dec 2025 14:43:53 +0800 Subject: [PATCH] Make filer estimation more accurate with histogram Make filer estimation more accurate with histogram --- .../plans/logical/statsEstimation/EstimationUtils.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala index 7083014f1f38..8254827884f3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala @@ -243,10 +243,12 @@ object EstimationUtils { } else { // Computes the occupied portion of bins of the upperBound and lowerBound. val lowerBin = bins(lowerBinIndex) - val lowerPart = binHoldingRangePossibility(lowerBin.hi, lowerBound, lowerBin) + val lowerPart = if (lowerBin.hi == lowerBound && !lowerBoundInclusive) 0.0 + else binHoldingRangePossibility(lowerBin.hi, lowerBound, lowerBin) val higherBin = bins(upperBinIndex) - val higherPart = binHoldingRangePossibility(upperBound, higherBin.lo, higherBin) + val higherPart = if (higherBin.lo == upperBound && !upperBoundInclusive) 0.0 + else binHoldingRangePossibility(upperBound, higherBin.lo, higherBin) // The total number of bins is lowerPart + higherPart + bins between them lowerPart + higherPart + upperBinIndex - lowerBinIndex - 1