diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
index ea192e8af9aa..b1a93ffcaaf1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkMapJoinProc.java
@@ -187,7 +187,8 @@ public static Object processReduceSinkToHashJoin(ReduceSinkOperator parentRS, Ma
           ExprNodeDesc realCol = parentRS.getColumnExprMap().get(prefix + "." + keyCol);
           ColStatistics cs =
               StatsUtils.getColStatisticsFromExpression(context.conf, stats, realCol);
-          if (cs == null || cs.getCountDistint() <= 0) {
+          if (cs == null || cs.getCountDistint() < 0) {
+            // unknown: same fallback as old "no stats / overloaded NDV=0" path
             maxKeyCount = Long.MAX_VALUE;
             break;
           }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetHashGroupByMinReduction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetHashGroupByMinReduction.java
index bbd474b842f8..06a1a5ba8849 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetHashGroupByMinReduction.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SetHashGroupByMinReduction.java
@@ -69,8 +69,8 @@ public Object process(Node nd, Stack<Node> stack,
     Statistics parentStats = groupByOperator.getParentOperators().get(0).getStatistics();
     long ndvProduct = StatsUtils.computeNDVGroupingColumns(
         colStats, parentStats, true);
-    // if ndvProduct is 0 then column stats state must be partial and we are missing
-    if (ndvProduct == 0) {
+    if (ndvProduct < 0) {
+      // unknown product - same fallback as old "overloaded NDV=0" path
       return null;
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
index f5431fa34934..aaa851b7942b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SortedDynPartitionOptimizer.java
@@ -90,6 +90,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
@@ -924,7 +925,8 @@ private long computeMaxWriters() {
      * Computes the partition cardinality based on column NDV statistics.
      * @return positive value = estimated cardinality, 0 = no partition columns, -1 = stats unavailable
      */
-    private long computePartCardinality(List<Integer> partitionPos,
+    @VisibleForTesting
+    long computePartCardinality(List<Integer> partitionPos,
         List<Function<List<ExprNodeDesc>, ExprNodeDesc>> customPartitionExprs,
         Statistics tStats, Operator<? extends OperatorDesc> fsParent,
         ArrayList<ExprNodeDesc> allRSCols) {
@@ -935,7 +937,8 @@ private long computePartCardinality(List<Integer> partitionPos,
         for (Integer idx : partitionPos) {
           ColumnInfo ci = fsParent.getSchema().getSignature().get(idx);
           ColStatistics partStats = tStats.getColumnStatisticsFromColName(ci.getInternalName());
-          if (partStats == null) {
+          // countDistinct < 0 means "unknown" - same path as missing stats
+          if (partStats == null || partStats.getCountDistint() < 0) {
             return -1;
           }
           partCardinality *= partStats.getCountDistint();
@@ -950,7 +953,8 @@ private long computePartCardinality(List<Integer> partitionPos,
           // implementations on UDFs (e.g. iceberg_bucket reports min(inputNDV, numBuckets))
           ColStatistics exprStats = StatsUtils.getColStatisticsFromExpression(
               this.parseCtx.getConf(), tStats, resolved);
-          if (exprStats == null) {
+          // countDistinct < 0 means "unknown" - same path as missing stats
+          if (exprStats == null || exprStats.getCountDistint() < 0) {
             return -1;
           }
           partCardinality *= exprStats.getCountDistint();
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
index b0f40d0d815e..eeb599e848dd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdDistinctRowCount.java
@@ -22,6 +22,7 @@
 import org.apache.calcite.rel.convert.Converter;
 import org.apache.calcite.rel.core.JoinRelType;
 import org.apache.calcite.rel.core.Spool;
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider;
 import org.apache.calcite.rel.metadata.RelMdDistinctRowCount;
 import org.apache.calcite.rel.metadata.RelMdUtil;
@@ -50,7 +51,8 @@ public class HiveRelMdDistinctRowCount extends RelMdDistinctRowCount {
       ReflectiveRelMetadataProvider.reflectiveSource(
           BuiltInMethod.DISTINCT_ROW_COUNT.method, new HiveRelMdDistinctRowCount());
 
-  private HiveRelMdDistinctRowCount() {
+  @VisibleForTesting
+  HiveRelMdDistinctRowCount() {
   }
 
   public Double getDistinctRowCount(HiveTableScan htRel, RelMetadataQuery mq, ImmutableBitSet groupKey,
@@ -60,6 +62,9 @@ public Double getDistinctRowCount(HiveTableScan htRel, RelMetadataQuery mq, Immu
     List<ColStatistics> colStats = htRel.getColStat(projIndxLst);
     Double noDistinctRows = 1.0;
     for (ColStatistics cStat : colStats) {
+      if (cStat.getCountDistint() <= 0) {
+        return 0.0;
+      }
       noDistinctRows *= cStat.getCountDistint();
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
index abfe6170217e..28bc2623a6a6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
@@ -557,18 +557,25 @@ private long evaluateInExpr(Statistics stats, ExprNodeDesc pred, long currNumRow
         factor *= children.size() - 1;
       }
       for (int i = 0; i < columnStats.size(); i++) {
-        long dvs = columnStats.get(i) == null ? 0 : columnStats.get(i).getCountDistint();
+        ColStatistics cs = columnStats.get(i);
+        long dvs = cs == null ? -1L : cs.getCountDistint();
         if (dvs == 0) {
-          factor *= 0.5;
-          continue;
+          // verified zero distinct values: IN cannot match any row
+          factor = 0;
+          break;
         }
-        // (num of distinct vals for col in IN clause  / num of distinct vals for col )
-        double columnFactor = 1.0 / dvs;
-        if (!multiColumn) {
-          columnFactor *= estimateIntersectionSize(aspCtx.getConf(), columnStats.get(i), values.get(i));
+        if (dvs < 0) {
+          // missing stats or unknown NDV
+          factor *= 0.5;
+        } else {
+          // (num of distinct vals for col in IN clause  / num of distinct vals for col )
+          double columnFactor = 1.0 / dvs;
+          if (!multiColumn) {
+            columnFactor *= estimateIntersectionSize(aspCtx.getConf(), columnStats.get(i), values.get(i));
+          }
+          // max can be 1, even when ndv is larger in IN clause than in column stats
+          factor *= Math.min(columnFactor, 1.0);
         }
-        // max can be 1, even when ndv is larger in IN clause than in column stats
-        factor *= Math.min(columnFactor, 1.0);
       }
 
       // Clamp at 1 to be sure that we don't get out of range.
@@ -1317,9 +1324,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
 
               ColStatistics cs = stats.getColumnStatisticsFromColName(colName);
               if (cs != null) {
-                long dvs = cs.getCountDistint();
-                numRows = dvs == 0 ? numRows / 2 : Math.round((double) numRows / dvs);
-                return numRows;
+                return rowsAfterEqualityFilter(numRows, cs.getCountDistint());
               }
             } else if (leaf instanceof ExprNodeColumnDesc) {
               ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) leaf;
@@ -1338,9 +1343,7 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
 
                 ColStatistics cs = stats.getColumnStatisticsFromColName(colName);
                 if (cs != null) {
-                  long dvs = cs.getCountDistint();
-                  numRows = dvs == 0 ? numRows / 2 : Math.round((double) numRows / dvs);
-                  return numRows;
+                  return rowsAfterEqualityFilter(numRows, cs.getCountDistint());
                 }
               }
             }
@@ -1380,6 +1383,16 @@ private long evaluateChildExpr(Statistics stats, ExprNodeDesc child,
       return numRows / 2;
     }
 
+    private static long rowsAfterEqualityFilter(long numRows, long dvs) {
+      if (dvs < 0) {
+        return numRows / 2;
+      }
+      if (dvs == 0) {
+        return 0;
+      }
+      return Math.round((double) numRows / dvs);
+    }
+
   }
 
   /**
@@ -1518,14 +1531,12 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         // compute product of distinct values of grouping columns
         long ndvProduct =
             StatsUtils.computeNDVGroupingColumns(colStats, parentStats, false);
-        // if ndvProduct is 0 then column stats state must be partial and we are missing
-        // column stats for a group by column
-        if (ndvProduct == 0) {
+        if (ndvProduct < 0) {
+          // unknown - missing column stats or unknown NDV on a grouping column
           ndvProduct = parentNumRows / 2;
 
           if (LOG.isDebugEnabled()) {
-            LOG.debug("STATS-" + gop.toString() + ": ndvProduct became 0 as some column does not" +
-                " have stats. ndvProduct changed to: " + ndvProduct);
+            LOG.debug("STATS-{}: ndvProduct unknown; falling back to {}", gop, ndvProduct);
           }
         }
         final long maxColumnNDV = colStats.stream()
@@ -1720,6 +1731,10 @@ static void computeAggregateColumnMinMax(ColStatistics cs, HiveConf conf, Aggreg
           long valuesCount = agg.getDistinct() ?
               parentCS.getCountDistint() :
               parentStats.getNumRows() - numNulls;
+          // countDistinct < 0 would produce a Range with a negative maxValue
+          if (agg.getDistinct() && valuesCount < 0) {
+            return;
+          }
           Range range = parentCS.getRange();
           // Get the aggregate function matching the name in the query.
           GenericUDAFResolver udaf =
@@ -1819,9 +1834,24 @@ private boolean checkMapSideAggregation(GroupByOperator gop,
 
         // estimate size of key from column statistics
         long avgKeySize = 0;
+        // lazily computed on first unknown NDV (null = not yet looked up)
+        Long parentNumRows = null;
         for (ColStatistics cs : colStats) {
           if (cs != null) {
-            numEstimatedRows = StatsUtils.safeMult(numEstimatedRows, cs.getCountDistint());
+            long ndv = cs.getCountDistint();
+            if (ndv < 0) {
+              if (parentNumRows == null) {
+                // unknown NDV: fall back to parentNumRows / 2, matching the heuristic
+                // used elsewhere in this file when GROUP BY cardinality cannot be computed
+                Statistics parentStats = gop.getParentOperators().get(0).getStatistics();
+                parentNumRows = (parentStats != null) ? parentStats.getNumRows() : -1L;
+              }
+              if (parentNumRows <= 0) {
+                return false;
+              }
+              ndv = parentNumRows / 2;
+            }
+            numEstimatedRows = StatsUtils.safeMult(numEstimatedRows, ndv);
             avgKeySize += Math.ceil(cs.getAvgColLen());
           }
         }
@@ -2227,7 +2257,8 @@ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
       return null;
     }
 
-    private long calculateUnmatchedRowsForOuter(HiveConf conf, long inputRowCount,
+    @VisibleForTesting
+    long calculateUnmatchedRowsForOuter(HiveConf conf, long inputRowCount,
         List<String> joinKeys, Statistics statistics, long distinctUnmatched) {
       // Extract the ndv from each of the columns involved in the join
       List<Long> distinctVals = new ArrayList<>();
@@ -2248,14 +2279,15 @@ private long calculateUnmatchedRowsForOuter(HiveConf conf, long inputRowCount,
           distinctVal = StatsUtils.addWithExpDecay(distinctVals);
         }
       }
-      // If we have a greater number of unmatched values than number of distinct values,
-      // we just return the number of rows in the input as we can assume there are no
-      // matches
-      if (distinctUnmatched >= distinctVal) {
+      // distinctVal <= 0 covers unknown (<0) and verified-zero (==0) cases; the latter means
+      // no key value matches anything, so every input row is unmatched in an outer join.
+      // distinctUnmatched < 0 (unknown) is treated conservatively the same way.
+      // If unmatched >= distinctVal, all rows can be assumed unmatched.
+      if (distinctVal <= 0 || distinctUnmatched < 0 || distinctUnmatched >= distinctVal) {
         return inputRowCount;
       }
       // Otherwise, divide the number of input rows by the number of distinct values
-      // and divide by the number of distinct values unmatched
+      // and multiply by the number of distinct values unmatched
       return StatsUtils.safeMult(inputRowCount / distinctVal, distinctUnmatched);
     }
 
@@ -2604,7 +2636,8 @@ void updateNumNulls(ColStatistics colStats, long leftUnmatchedRows, long rightUn
       colStats.setNumNulls(newNumNulls);
     }
 
-    private void updateColStats(HiveConf conf, Statistics stats, long leftUnmatchedRows, long rightUnmatchedRows,
+    @VisibleForTesting
+    void updateColStats(HiveConf conf, Statistics stats, long leftUnmatchedRows, long rightUnmatchedRows,
         long newNumRows, CommonJoinOperator<? extends JoinDesc> jop, Map<Integer, Long> rowCountParents) {
 
       if (newNumRows < 0) {
@@ -2632,26 +2665,29 @@ private void updateColStats(HiveConf conf, Statistics stats, long leftUnmatchedR
         int pos = jop.getConf().getReversedExprs().get(cs.getColumnName());
         long oldDV = cs.getCountDistint();
 
-        boolean useCalciteForNdvReadjustment
-            = HiveConf.getBoolVar(conf, ConfVars.HIVE_STATS_JOIN_NDV_READJUSTMENT);
-        long newDV = oldDV;
-        if (useCalciteForNdvReadjustment) {
-          Double approxNdv = RelMdUtil.numDistinctVals(oldDV * 1.0, newNumRows * 1.0);
-          Preconditions.checkNotNull(approxNdv, "approximate NDV is null");
-          newDV = approxNdv.longValue();
-        } else {
-          long oldRowCount = rowCountParents.get(pos);
-          double ratio = (double) newNumRows / (double) oldRowCount;
-
-          // if ratio is greater than 1, then number of rows increases. This can happen
-          // when some operators like GROUPBY duplicates the input rows in which case
-          // number of distincts should not change. Update the distinct count only when
-          // the output number of rows is less than input number of rows.
-          if (ratio <= 1.0) {
-            newDV = (long) Math.ceil(ratio * oldDV);
+        // countDistinct < 0 means "unknown"
+        if (oldDV >= 0) {
+          boolean useCalciteForNdvReadjustment
+              = HiveConf.getBoolVar(conf, ConfVars.HIVE_STATS_JOIN_NDV_READJUSTMENT);
+          long newDV = oldDV;
+          if (useCalciteForNdvReadjustment) {
+            Double approxNdv = RelMdUtil.numDistinctVals(oldDV * 1.0, newNumRows * 1.0);
+            Preconditions.checkNotNull(approxNdv, "approximate NDV is null");
+            newDV = approxNdv.longValue();
+          } else {
+            long oldRowCount = rowCountParents.get(pos);
+            double ratio = (double) newNumRows / (double) oldRowCount;
+
+            // if ratio is greater than 1, then number of rows increases. This can happen
+            // when some operators like GROUPBY duplicates the input rows in which case
+            // number of distincts should not change. Update the distinct count only when
+            // the output number of rows is less than input number of rows.
+            if (ratio < 1.0) {
+              newDV = (long) Math.ceil(ratio * oldDV);
+            }
           }
+          cs.setCountDistint(newDV);
         }
-        cs.setCountDistint(newDV);
         updateNumNulls(cs, leftUnmatchedRows, rightUnmatchedRows, newNumRows, pos, jop);
       }
       stats.setColumnStats(colStats);
@@ -2718,7 +2754,8 @@ private long computeFinalRowCount(List<Long> rowCountParents, long interimRowCou
       return result;
     }
 
-    private long computeRowCountAssumingInnerJoin(List<Long> rowCountParents, long denom,
+    @VisibleForTesting
+    long computeRowCountAssumingInnerJoin(List<Long> rowCountParents, long denom,
         CommonJoinOperator<? extends JoinDesc> join) {
       double factor = 0.0d;
       long result = 1;
@@ -2734,7 +2771,12 @@ private long computeRowCountAssumingInnerJoin(List<Long> rowCountParents, long d
         }
       }
 
-      denom = denom == 0 ? 1 : denom;
+      // denom < 0 (unknown) and denom == 0 (verified-zero join key, cardinality formula
+      // degenerate) both fall back to "no constraint" rather than producing a negative
+      // factor or div-by-zero
+      if (denom <= 0) {
+        denom = 1;
+      }
       factor = (double) max / (double) denom;
 
       for (int i = 0; i < rowCountParents.size(); i++) {
@@ -2786,6 +2828,9 @@ private long getDenominatorForUnmatchedRows(List<Long> distinctVals) {
       if (distinctVals.isEmpty()) {
         return 2;
       }
+      if (StatsUtils.containsUnknownNDV(distinctVals)) {
+        return -1L;
+      }
 
       // simple join from 2 relations: denom = min(v1, v2)
       if (distinctVals.size() <= 2) {
@@ -2826,6 +2871,9 @@ private long getDenominator(List<Long> distinctVals) {
         // denominator is 2.
         return 2;
       }
+      if (StatsUtils.containsUnknownNDV(distinctVals)) {
+        return -1L;
+      }
 
       // simple join from 2 relations: denom = max(v1, v2)
       if (distinctVals.size() <= 2) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index d672b7acfc22..ab048a94dc29 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -260,7 +260,11 @@ public void addToColumnStats(List<ColStatistics> colStats) {
         } else {
           existing.setNumNulls(StatsUtils.safeAdd(existing.getNumNulls(), cs.getNumNulls()));
         }
-        existing.setCountDistint(Math.max(existing.getCountDistint(), cs.getCountDistint()));
+        if (cs.getCountDistint() < 0 || existing.getCountDistint() < 0) {
+          existing.setCountDistint(-1);
+        } else {
+          existing.setCountDistint(Math.max(existing.getCountDistint(), cs.getCountDistint()));
+        }
       }
     }
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 55f9d0c1e158..cba53f5df4ee 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -813,7 +813,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col
     } else if (colTypeLowerCase.equals(serdeConstants.STRING_TYPE_NAME)
         || colTypeLowerCase.startsWith(serdeConstants.CHAR_TYPE_NAME)
         || colTypeLowerCase.startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
-      cs.setCountDistint(csd.getStringStats().getNumDVs());
+      cs.setCountDistint(csd.getStringStats().isSetNumDVs() ? csd.getStringStats().getNumDVs() : -1);
       cs.setNumNulls(csd.getStringStats().getNumNulls());
       cs.setAvgColLen(csd.getStringStats().getAvgColLen());
       cs.setBitVectors(csd.getStringStats().getBitVectors());
@@ -837,9 +837,12 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col
     } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) {
       cs.setAvgColLen(csd.getBinaryStats().getAvgColLen());
       cs.setNumNulls(csd.getBinaryStats().getNumNulls());
+      // BinaryColumnStatsData has no numDVs field - the metastore does not track NDV
+      // for binary columns, so it is genuinely unknown
+      cs.setCountDistint(-1);
     } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
       cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
-      cs.setCountDistint(csd.getTimestampStats().getNumDVs());
+      cs.setCountDistint(csd.getTimestampStats().isSetNumDVs() ? csd.getTimestampStats().getNumDVs() : -1);
       cs.setNumNulls(csd.getTimestampStats().getNumNulls());
       Long lowVal = (csd.getTimestampStats().getLowValue() != null) ? csd.getTimestampStats().getLowValue()
           .getSecondsSinceEpoch() : null;
@@ -852,7 +855,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col
       cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp());
     } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) {
       cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal());
-      cs.setCountDistint(csd.getDecimalStats().getNumDVs());
+      cs.setCountDistint(csd.getDecimalStats().isSetNumDVs() ? csd.getDecimalStats().getNumDVs() : -1);
       cs.setNumNulls(csd.getDecimalStats().getNumNulls());
       Decimal highValue = csd.getDecimalStats().getHighValue();
       Decimal lowValue = csd.getDecimalStats().getLowValue();
@@ -871,7 +874,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String col
       cs.setHistogram(csd.getDecimalStats().getHistogram());
     } else if (colTypeLowerCase.equals(serdeConstants.DATE_TYPE_NAME)) {
       cs.setAvgColLen(JavaDataModel.get().lengthOfDate());
-      cs.setCountDistint(csd.getDateStats().getNumDVs());
+      cs.setCountDistint(csd.getDateStats().isSetNumDVs() ? csd.getDateStats().getNumDVs() : -1);
       cs.setNumNulls(csd.getDateStats().getNumNulls());
       Long lowVal = (csd.getDateStats().getLowValue() != null) ? csd.getDateStats().getLowValue()
           .getDaysSinceEpoch() : null;
@@ -900,7 +903,7 @@ public static void fillColumnStatisticsData(ColumnStatisticsData data, ColStatis
 
   private static void fillColStatisticsFromLongStatsData(ColStatistics cs, LongColumnStatsData longStats,
       double avgColLen) {
-    cs.setCountDistint(longStats.getNumDVs());
+    cs.setCountDistint(longStats.isSetNumDVs() ? longStats.getNumDVs() : -1);
     cs.setNumNulls(longStats.getNumNulls());
     cs.setAvgColLen(avgColLen);
     Long lowVal = longStats.isSetLowValue() ? longStats.getLowValue() : null;
@@ -912,7 +915,7 @@ private static void fillColStatisticsFromLongStatsData(ColStatistics cs, LongCol
 
   private static void fillColStatisticsFromDoubleStatsData(ColStatistics cs, DoubleColumnStatsData doubleStats,
       double avgColLen) {
-    cs.setCountDistint(doubleStats.getNumDVs());
+    cs.setCountDistint(doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : -1);
     cs.setNumNulls(doubleStats.getNumNulls());
     cs.setAvgColLen(avgColLen);
     Double lowVal = doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null;
@@ -1690,6 +1693,9 @@ public static Long addWithExpDecay (List<Long> distinctVals) {
     // Exponential back-off for NDVs.
     // 1) Descending order sort of NDVs
     // 2) denominator = NDV1 * (NDV2 ^ (1/2)) * (NDV3 ^ (1/4))) * ....
+    if (containsUnknownNDV(distinctVals)) {
+      return -1L;
+    }
     distinctVals.sort(Collections.reverseOrder());
 
     long denom = distinctVals.get(0);
@@ -1716,6 +1722,10 @@ private static long getNDVFor(ExprNodeGenericFuncDesc engfd, long numRows, Stati
       for (String col : engfd.getCols()) {
         ColStatistics stats = parentStats.getColumnStatisticsFromColName(col);
         if (stats != null) {
+          // countDistinct < 0 means "unknown"
+          if (stats.getCountDistint() < 0) {
+            return -1L;
+          }
           ndvs.add(stats.getCountDistint());
         }
       }
@@ -2036,20 +2046,23 @@ public static void updateStats(Statistics stats, long newNumRows,
       for (ColStatistics cs : colStats) {
         long oldDV = cs.getCountDistint();
         if (affectedColumns.contains(cs.getColumnName())) {
-          long newDV = oldDV;
-
-          // if ratio is greater than 1, then number of rows increases. This can happen
-          // when some operators like GROUPBY duplicates the input rows in which case
-          // number of distincts should not change. Update the distinct count only when
-          // the output number of rows is less than input number of rows.
-          if (ratio <= 1.0) {
-            newDV = (long) Math.ceil(ratio * oldDV);
-          }
-          cs.setCountDistint(newDV);
           cs.setFilterColumn();
-          oldDV = newDV;
+          // countDistinct < 0 means "unknown" - skip the NDV math
+          if (oldDV >= 0) {
+            long newDV = oldDV;
+
+            // if ratio is greater than 1, then number of rows increases. This can happen
+            // when some operators like GROUPBY duplicates the input rows in which case
+            // number of distincts should not change. Update the distinct count only when
+            // the output number of rows is less than input number of rows.
+            if (ratio <= 1.0) {
+              newDV = (long) Math.ceil(ratio * oldDV);
+            }
+            cs.setCountDistint(newDV);
+            oldDV = newDV;
+          }
         }
-        if (oldDV > newNumRows) {
+        if (oldDV >= 0 && oldDV > newNumRows) {
           cs.setCountDistint(newNumRows);
         }
         // numNulls < 0 means "unknown" - preserve the sentinel value
@@ -2080,7 +2093,8 @@ public static void scaleColStatistics(List<ColStatistics> colStats, double facto
       if (cs.getNumNulls() >= 0) {
         cs.setNumNulls(StatsUtils.safeMult(cs.getNumNulls(), factor));
       }
-      if (factor < 1.0) {
+      // countDistinct < 0 means "unknown" - preserve the sentinel value
+      if (factor < 1.0 && cs.getCountDistint() >= 0) {
         final double newNDV = Math.ceil(cs.getCountDistint() * factor);
         cs.setCountDistint(newNDV > Long.MAX_VALUE ? Long.MAX_VALUE : (long) newNDV);
       }
@@ -2092,7 +2106,8 @@ public static long computeNDVGroupingColumns(List<ColStatistics> colStats, Stati
     List<Long> ndvValues =
         extractNDVGroupingColumns(colStats, parentStats);
     if (ndvValues == null) {
-      return 0L;
+      // unknown: a grouping column has NDV<0 or stats are missing on a partial state
+      return -1L;
     }
     if (ndvValues.isEmpty()) {
       // No grouping columns, one row
@@ -2112,6 +2127,11 @@ private static List<Long> extractNDVGroupingColumns(List<ColStatistics> colStats
     for (ColStatistics cs : colStats) {
       if (cs != null) {
         long ndv = cs.getCountDistint();
+        // countDistinct < 0 means "unknown" - signal it like a missing entry
+        if (ndv < 0) {
+          ndvValues = null;
+          break;
+        }
         if (cs.getNumNulls() > 0) {
           ndv = StatsUtils.safeAdd(ndv, 1);
         }
@@ -2134,4 +2154,13 @@ private static List<Long> extractNDVGroupingColumns(List<ColStatistics> colStats
 
     return ndvValues;
   }
+
+  /**
+   * Returns true if any value in the given list is the negative NDV "unknown"
+   * sentinel established by HIVE-29438 / HIVE-29625. Used by aggregators that
+   * must propagate unknown when any contributor is unknown.
+   */
+  public static boolean containsUnknownNDV(List<Long> distinctVals) {
+    return distinctVals.stream().anyMatch(v -> v < 0);
+  }
 }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java
index 4de2867de7c0..2a3e16c48235 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/PessimisticStatCombiner.java
@@ -41,7 +41,9 @@ public void add(ColStatistics stat) {
     if (stat.getAvgColLen() > result.getAvgColLen()) {
       result.setAvgColLen(stat.getAvgColLen());
     }
-    if (stat.getCountDistint() > result.getCountDistint()) {
+    if (stat.getCountDistint() < 0 || result.getCountDistint() < 0) {
+      result.setCountDistint(-1);
+    } else if (stat.getCountDistint() > result.getCountDistint()) {
       result.setCountDistint(stat.getCountDistint());
     }
     if (stat.getNumNulls() < 0 || result.getNumNulls() < 0) {
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestReduceSinkMapJoinProc.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestReduceSinkMapJoinProc.java
new file mode 100644
index 000000000000..58329a228027
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestReduceSinkMapJoinProc.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Stream;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.parse.GenTezProcContext;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.plan.BaseWork;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.mockito.MockedStatic;
+
+class TestReduceSinkMapJoinProc {
+
+  // A null ndv row represents StatsUtils.getColStatisticsFromExpression returning null.
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("keyCountFromNdvCases")
+  void testProcessReduceSinkToHashJoinKeyCountFromNdv(
+      String scenarioName, Long ndv, long parentRows, long expectedKeyCount) throws Exception {
+    invokeAndAssertKeyCount(ndv == null ? null : buildColStat(ndv), parentRows, expectedKeyCount);
+  }
+
+  private static Stream<Arguments> keyCountFromNdvCases() {
+    // Behavior recap:
+    //   Initial keyCount = parentRows = stats.getNumRows()
+    //   For each key col:
+    //     if cs == null or cs.getCountDistint() < 0 -> maxKeyCount = MAX_VALUE; break    <-- HIVE-29625 change
+    //     else maxKeyCount *= cs.getCountDistint()
+    //   keyCount = min(maxKeyCount, keyCount)
+    //   if keyCount == 0 -> keyCount = 1
+    //   joinConf.getParentKeyCounts().put(pos, keyCount)   [only if keyCount != MAX_VALUE]
+    return Stream.of(
+        // NDV > 0 and below parentRows -> keyCount = NDV
+        Arguments.of("knownPositiveBelowParent",       10L,    1000L, 10L),
+        // NDV > 0 but above parentRows -> capped at parentRows
+        Arguments.of("knownPositiveAboveParent",       5000L,  1000L, 1000L),
+        // NDV = 0 (verified zero under HIVE-29625) -> maxKeyCount = 0 -> keyCount = 0 -> bumped to 1
+        Arguments.of("verifiedZeroBumpsToOne",         0L,     1000L, 1L),
+        // NDV = -1 (unknown under HIVE-29625) -> maxKeyCount = MAX_VALUE -> keyCount = parentRows
+        Arguments.of("unknownFallsBackToParent",       -1L,    1000L, 1000L),
+        // cs == null (no derivable stat) -> shares the MAX_VALUE fallback path
+        Arguments.of("nullColStatsFallsBackToParent",  null,   1000L, 1000L)
+    );
+  }
+
+  // Shared harness: build GenTezProcContext + mocked operators, run the method, read the keyCount put().
+  private static void invokeAndAssertKeyCount(
+      ColStatistics csForKey, long parentRows, long expectedKeyCount) throws Exception {
+
+    // ---- Operator chain mocks ----
+    ReduceSinkOperator parentRS = mock(ReduceSinkOperator.class);
+    MapJoinOperator mapJoinOp = mock(MapJoinOperator.class);
+    ReduceSinkDesc rsConf = mock(ReduceSinkDesc.class);
+    MapJoinDesc joinConf = mock(MapJoinDesc.class);
+    Statistics rsStats = mock(Statistics.class);
+    ExprNodeDesc keyExpr = mock(ExprNodeDesc.class);
+    BaseWork parentWork = mock(BaseWork.class);
+
+    when(parentRS.getConf()).thenReturn(rsConf);
+    when(parentRS.getStatistics()).thenReturn(rsStats);
+    when(parentRS.getCompilationOpContext()).thenReturn(new CompilationOpContext());
+    Map<String, ExprNodeDesc> columnExprMap = new HashMap<>();
+    columnExprMap.put(Utilities.ReduceField.KEY.toString() + ".k0", keyExpr);
+    when(parentRS.getColumnExprMap()).thenReturn(columnExprMap);
+    Operator<?> upstreamParent = mock(Operator.class);
+    when(upstreamParent.getSchema()).thenReturn(new RowSchema(Collections.emptyList()));
+    when(parentRS.getParentOperators()).thenReturn(Arrays.asList(upstreamParent));
+    List<Operator<? extends OperatorDesc>> childOps = new ArrayList<>();
+    childOps.add(mapJoinOp);
+    when(parentRS.getChildOperators()).thenReturn(childOps);
+
+    when(mapJoinOp.getConf()).thenReturn(joinConf);
+
+    when(rsConf.getOutputKeyColumnNames()).thenReturn(Arrays.asList("k0"));
+
+    when(joinConf.isBucketMapJoin()).thenReturn(false);
+    when(joinConf.isDynamicPartitionHashJoin()).thenReturn(false);
+    Map<Integer, Long> parentKeyCounts = new LinkedHashMap<>();
+    when(joinConf.getParentKeyCounts()).thenReturn(parentKeyCounts);
+    when(joinConf.getParentToInput()).thenReturn(new LinkedHashMap<>());
+    when(joinConf.getParentDataSizes()).thenReturn(new LinkedHashMap<>());
+    Map<Byte, List<ExprNodeDesc>> keyExprMap = new HashMap<>();
+    keyExprMap.put((byte) 0, Collections.emptyList());
+    when(joinConf.getKeys()).thenReturn(keyExprMap);
+
+    when(rsStats.getNumRows()).thenReturn(parentRows);
+    when(rsStats.getDataSize()).thenReturn(8000L);
+
+    when(parentWork.getName()).thenReturn("parent_work");
+
+    // ---- Real GenTezProcContext (constructor sets up all the maps for us) ----
+    HiveConf conf = new HiveConf();
+    ParseContext parseCtx = mock(ParseContext.class);
+    Context ctx = mock(Context.class);
+    when(parseCtx.getContext()).thenReturn(ctx);
+    when(ctx.getSequencer()).thenReturn(new AtomicInteger());
+    GenTezProcContext context = new GenTezProcContext(
+        conf, parseCtx, Collections.emptyList(), new ArrayList<>(),
+        Collections.emptySet(), Collections.emptySet());
+
+    context.childToWorkMap.put(parentRS, Arrays.asList(parentWork));
+    context.mapJoinParentMap.put(mapJoinOp, Arrays.asList(parentRS));
+
+    // ---- Stub StatsUtils.getColStatisticsFromExpression to return our chosen colStat ----
+    try (MockedStatic<StatsUtils> stub = mockStatic(StatsUtils.class)) {
+      stub.when(() -> StatsUtils.getColStatisticsFromExpression(
+              any(HiveConf.class), any(Statistics.class), any(ExprNodeDesc.class)))
+          .thenReturn(csForKey);
+
+      ReduceSinkMapJoinProc.processReduceSinkToHashJoin(parentRS, mapJoinOp, context);
+    }
+
+    Long actual = parentKeyCounts.get(0);
+    assertEquals(expectedKeyCount, actual.longValue());
+  }
+
+  private static ColStatistics buildColStat(long ndv) {
+    ColStatistics cs = new ColStatistics("k0", "int");
+    cs.setCountDistint(ndv);
+    return cs;
+  }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSetHashGroupByMinReduction.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSetHashGroupByMinReduction.java
new file mode 100644
index 000000000000..6c7c2145382b
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSetHashGroupByMinReduction.java
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.ArgumentMatchers.anyFloat;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.atLeastOnce;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.when;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.function.Consumer;
+import java.util.stream.Stream;
+
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc.Mode;
+import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.plan.Statistics.State;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.mockito.MockedStatic;
+
+class TestSetHashGroupByMinReduction {
+
+  // Default-reduction tuple used across all tests. Picked so the known-positive case
+  // (ndvProduct=100, numRows=1000) produces a factor (0.9) strictly below the default
+  // (0.99), triggering setMinReductionHashAggr.
+  private static final float DEFAULT_MIN_REDUCTION = 0.99f;
+  private static final float DEFAULT_MIN_REDUCTION_LOWER_BOUND = 0.1f;
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("ndvProductCases")
+  void testProcessByNdvProduct(String name, long ndvProduct, boolean expectSetCall)
+      throws SemanticException {
+    GroupByOperator op = setupCompleteHashGroupBy();
+    GroupByDesc desc = op.getConf();
+
+    try (MockedStatic<StatsUtils> stub = mockStatic(StatsUtils.class)) {
+      stub.when(() -> StatsUtils.computeNDVGroupingColumns(any(), any(), eq(true)))
+          .thenReturn(ndvProduct);
+
+      Object result = new SetHashGroupByMinReduction().process(op, null, null);
+
+      assertNull(result, "process() always returns null sentinel");
+      verify(desc, expectSetCall ? atLeastOnce() : never()).setMinReductionHashAggr(anyFloat());
+    }
+  }
+
+  private static Stream<Arguments> ndvProductCases() {
+    return Stream.of(
+        Arguments.of("unknownNDVEarlyReturns",        -1L,  false),
+        Arguments.of("verifiedZeroFactorTooHigh",      0L,  false),
+        Arguments.of("knownPositiveBelowDefault",    100L,  true)
+    );
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("earlyReturnGateCases")
+  void testProcessEarlyReturnsOnUnsupportedState(String name, Consumer<GroupByOperator> flipGate)
+      throws SemanticException {
+    GroupByOperator op = setupCompleteHashGroupBy();
+    flipGate.accept(op);
+
+    Object result = new SetHashGroupByMinReduction().process(op, null, null);
+
+    assertNull(result);
+    verify(op.getConf(), never()).setMinReductionHashAggr(anyFloat());
+  }
+
+  private static Stream<Arguments> earlyReturnGateCases() {
+    return Stream.of(
+        Arguments.of("modeNotHash",
+            (Consumer<GroupByOperator>) op ->
+                when(op.getConf().getMode()).thenReturn(Mode.MERGEPARTIAL)),
+        Arguments.of("basicStatsIncomplete",
+            (Consumer<GroupByOperator>) op ->
+                when(op.getStatistics().getBasicStatsState()).thenReturn(State.PARTIAL)),
+        Arguments.of("columnStatsIncomplete",
+            (Consumer<GroupByOperator>) op ->
+                when(op.getStatistics().getColumnStatsState()).thenReturn(State.PARTIAL))
+    );
+  }
+
+  // Passes all early-return gates; empty keys make the colStats loop a no-op.
+  private static GroupByOperator setupCompleteHashGroupBy() {
+    GroupByOperator op = mock(GroupByOperator.class);
+    GroupByDesc desc = mock(GroupByDesc.class);
+    Statistics stats = mock(Statistics.class);
+    Operator<?> parent = mock(Operator.class);
+    Statistics parentStats = mock(Statistics.class);
+    RowSchema schema = mock(RowSchema.class);
+
+    when(op.getConf()).thenReturn(desc);
+    when(op.getStatistics()).thenReturn(stats);
+    when(op.getSchema()).thenReturn(schema);
+    when(schema.getSignature()).thenReturn(Collections.emptyList());
+
+    when(desc.getMode()).thenReturn(Mode.HASH);
+    when(desc.getKeys()).thenReturn(Collections.emptyList());
+    when(desc.getMinReductionHashAggr()).thenReturn(DEFAULT_MIN_REDUCTION);
+    when(desc.getMinReductionHashAggrLowerBound()).thenReturn(DEFAULT_MIN_REDUCTION_LOWER_BOUND);
+
+    when(stats.getBasicStatsState()).thenReturn(State.COMPLETE);
+    when(stats.getColumnStatsState()).thenReturn(State.COMPLETE);
+
+    when(parent.getStatistics()).thenReturn(parentStats);
+    when(parentStats.getNumRows()).thenReturn(1000L);
+
+    when(op.getParentOperators()).thenReturn(Arrays.asList(parent));
+
+    return op;
+  }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSortedDynPartitionOptimizer.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSortedDynPartitionOptimizer.java
new file mode 100644
index 000000000000..3fbd063619e3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestSortedDynPartitionOptimizer.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.ArgumentMatchers.eq;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.function.Function;
+import java.util.stream.Stream;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.Statistics;
+import org.apache.hadoop.hive.ql.stats.StatsUtils;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.mockito.MockedStatic;
+
+class TestSortedDynPartitionOptimizer {
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("computePartCardinalityColumnCases")
+  void testComputePartCardinalityColumnBranch(
+      String scenarioName, long[] ndvs, boolean firstStatNull, long expected) {
+    SortedDynPartitionOptimizer.SortedDynamicPartitionProc proc = newProc(null);
+
+    Statistics tStats = mock(Statistics.class);
+    Operator<?> fsParent = mock(FileSinkOperator.class);
+    RowSchema schema = mock(RowSchema.class);
+    when(fsParent.getSchema()).thenReturn(schema);
+
+    ColStatistics[] colStats = buildColStats(ndvs, firstStatNull, "p");
+    List<ColumnInfo> sig = new ArrayList<>();
+    List<Integer> partitionPos = new ArrayList<>();
+    for (int i = 0; i < ndvs.length; i++) {
+      String colName = "p" + i;
+      ColumnInfo ci = mock(ColumnInfo.class);
+      when(ci.getInternalName()).thenReturn(colName);
+      sig.add(ci);
+      when(tStats.getColumnStatisticsFromColName(colName)).thenReturn(colStats[i]);
+      partitionPos.add(i);
+    }
+    when(schema.getSignature()).thenReturn(sig);
+
+    long result = proc.computePartCardinality(
+        partitionPos, Collections.emptyList(), tStats, fsParent, new ArrayList<>());
+
+    assertEquals(expected, result, scenarioName);
+  }
+
+  private static Stream<Arguments> computePartCardinalityColumnCases() {
+    return Stream.of(
+        // All known positive NDVs - product computed
+        Arguments.of("twoPositiveColumns",          new long[] {10L, 5L},     false, 50L),
+        Arguments.of("singlePositiveColumn",        new long[] {42L},          false, 42L),
+        Arguments.of("threeColumnsCompound",        new long[] {3L, 4L, 5L},   false, 60L),
+        // HIVE-29625: NDV<0 is unknown - returns -1
+        Arguments.of("unknownNDVShortCircuits",     new long[] {10L, -1L, 5L}, false, -1L),
+        Arguments.of("firstUnknownShortCircuits",   new long[] {-1L, 10L},     false, -1L),
+        Arguments.of("singleUnknownColumn",         new long[] {-1L},          false, -1L),
+        // Verified zero NDV (HIVE-29625 disambiguation) - falls through to multiplication
+        Arguments.of("verifiedZeroProducesZero",    new long[] {10L, 0L, 5L},  false, 0L),
+        Arguments.of("singleVerifiedZero",          new long[] {0L},           false, 0L),
+        // Missing stats (partStats == null) - returns -1
+        Arguments.of("nullStatsShortCircuits",      new long[] {0L, 10L},      true,  -1L)
+    );
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("computePartCardinalityExprCases")
+  void testComputePartCardinalityCustomExprBranch(
+      String scenarioName, long[] ndvs, boolean firstStatNull, long expected) {
+    HiveConf conf = new HiveConf();
+    ParseContext parseCtx = mock(ParseContext.class);
+    when(parseCtx.getConf()).thenReturn(conf);
+    SortedDynPartitionOptimizer.SortedDynamicPartitionProc proc = newProc(parseCtx);
+
+    Statistics tStats = mock(Statistics.class);
+    Operator<?> fsParent = mock(FileSinkOperator.class);
+    ArrayList<ExprNodeDesc> allRSCols = new ArrayList<>();
+    List<Function<List<ExprNodeDesc>, ExprNodeDesc>> exprs = new ArrayList<>();
+    List<ExprNodeDesc> resolvedExprs = new ArrayList<>();
+    for (int i = 0; i < ndvs.length; i++) {
+      ExprNodeDesc resolved = mock(ExprNodeDesc.class);
+      resolvedExprs.add(resolved);
+      exprs.add(cols -> resolved);
+    }
+
+    ColStatistics[] colStats = buildColStats(ndvs, firstStatNull, "e");
+    try (MockedStatic<StatsUtils> stub = mockStatic(StatsUtils.class)) {
+      for (int i = 0; i < ndvs.length; i++) {
+        final int idx = i;
+        stub.when(() -> StatsUtils.getColStatisticsFromExpression(eq(conf), eq(tStats), eq(resolvedExprs.get(idx))))
+            .thenReturn(colStats[idx]);
+      }
+
+      long result = proc.computePartCardinality(
+          Collections.emptyList(), exprs, tStats, fsParent, allRSCols);
+
+      assertEquals(expected, result, scenarioName);
+    }
+  }
+
+  private static Stream<Arguments> computePartCardinalityExprCases() {
+    return Stream.of(
+        Arguments.of("singleKnownExpr",             new long[] {7L},          false, 7L),
+        Arguments.of("twoKnownExprsMultiply",       new long[] {3L, 4L},      false, 12L),
+        // HIVE-29625: NDV<0 from expression stats short-circuits
+        Arguments.of("unknownExprStatsShortCircuit", new long[] {5L, -1L},    false, -1L),
+        Arguments.of("firstExprUnknown",            new long[] {-1L, 5L},     false, -1L),
+        // Verified zero from expression stats - falls through to multiplication
+        Arguments.of("verifiedZeroExprProducesZero", new long[] {5L, 0L},     false, 0L),
+        // Null expression stats (StatsUtils returned null) - returns -1
+        Arguments.of("nullExprStatsShortCircuits",   new long[] {0L, 5L},     true,  -1L)
+    );
+  }
+
+  @Test
+  void testComputePartCardinalityBothEmptyReturnsZero() {
+    SortedDynPartitionOptimizer.SortedDynamicPartitionProc proc = newProc(null);
+    long result = proc.computePartCardinality(
+        Collections.emptyList(), Collections.emptyList(),
+        mock(Statistics.class), mock(FileSinkOperator.class), new ArrayList<>());
+    assertEquals(0L, result, "Both partitionPos and customPartitionExprs empty -> 0");
+  }
+
+  private static SortedDynPartitionOptimizer.SortedDynamicPartitionProc newProc(ParseContext parseCtx) {
+    SortedDynPartitionOptimizer outer = new SortedDynPartitionOptimizer();
+    return outer.new SortedDynamicPartitionProc(parseCtx);
+  }
+
+  // First entry is null when firstStatNull is true; simulates a missing stat for either branch.
+  private static ColStatistics[] buildColStats(long[] ndvs, boolean firstStatNull, String prefix) {
+    ColStatistics[] result = new ColStatistics[ndvs.length];
+    for (int i = 0; i < ndvs.length; i++) {
+      if (i == 0 && firstStatNull) {
+        result[i] = null;
+      } else {
+        ColStatistics cs = new ColStatistics(prefix + i, "int");
+        cs.setCountDistint(ndvs[i]);
+        result[i] = cs;
+      }
+    }
+    return result;
+  }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestHiveRelMdDistinctRowCount.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestHiveRelMdDistinctRowCount.java
new file mode 100644
index 000000000000..c49b6db33b28
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/stats/TestHiveRelMdDistinctRowCount.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.optimizer.calcite.stats;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.mockito.ArgumentMatchers.any;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+
+import org.apache.calcite.rel.metadata.RelMetadataQuery;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+class TestHiveRelMdDistinctRowCount {
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("getDistinctRowCountCases")
+  void testGetDistinctRowCountHiveTableScan(
+      String scenarioName, long[] ndvs, double rowCount, double expected) {
+    HiveTableScan htRel = mock(HiveTableScan.class);
+    RelMetadataQuery mq = mock(RelMetadataQuery.class);
+
+    when(htRel.getColStat(any())).thenReturn(buildColStats(ndvs));
+    when(mq.getRowCount(htRel)).thenReturn(rowCount);
+
+    HiveRelMdDistinctRowCount provider = new HiveRelMdDistinctRowCount();
+    Double result = provider.getDistinctRowCount(htRel, mq, ImmutableBitSet.of(0), null);
+
+    assertEquals(expected, result);
+  }
+
+  private static Stream<Arguments> getDistinctRowCountCases() {
+    return Stream.of(
+        // All positive, product fits under row count
+        Arguments.of("allPositiveProductUnderRowCount", new long[] {10L, 5L}, 1000.0, 50.0),
+        Arguments.of("singlePositive",                  new long[] {42L},     1000.0, 42.0),
+        // Product exceeds row count -> capped
+        Arguments.of("productCappedAtRowCount",         new long[] {2000L, 50L}, 1000.0, 1000.0),
+        Arguments.of("singlePositiveExceedsRowCount",   new long[] {5000L},    1000.0, 1000.0),
+        // Verified-zero NDV in any column triggers the <=0 early-exit
+        Arguments.of("verifiedZeroInAnyColumn",         new long[] {10L, 0L, 5L}, 1000.0, 0.0),
+        Arguments.of("verifiedZeroFirstShortCircuits",  new long[] {0L, 10L},    1000.0, 0.0),
+        Arguments.of("verifiedZeroAlone",               new long[] {0L},         1000.0, 0.0),
+        // Unknown NDV (-1) in any column triggers the <=0 early-exit (HIVE-29625)
+        Arguments.of("unknownInAnyColumn",              new long[] {10L, -1L, 5L}, 1000.0, 0.0),
+        Arguments.of("unknownFirstShortCircuits",       new long[] {-1L, 10L},   1000.0, 0.0),
+        Arguments.of("unknownAlone",                    new long[] {-1L},        1000.0, 0.0),
+        // Mixed verified-zero and unknown: both produce 0.0 regardless of order
+        Arguments.of("unknownThenVerifiedZero",         new long[] {-1L, 0L},    1000.0, 0.0),
+        Arguments.of("verifiedZeroThenUnknown",         new long[] {0L, -1L},    1000.0, 0.0),
+        // Empty column list - loop doesn't execute, fall through to Math.min(1.0, rowCount)
+        Arguments.of("emptyColStatsFallsThroughTo1",    new long[] {},           1000.0, 1.0),
+        Arguments.of("emptyColStatsCappedByLowRowCount", new long[] {},          0.5,    0.5)
+    );
+  }
+
+  private static List<ColStatistics> buildColStats(long[] ndvs) {
+    List<ColStatistics> stats = new ArrayList<>();
+    for (int i = 0; i < ndvs.length; i++) {
+      ColStatistics cs = new ColStatistics("c" + i, "int");
+      cs.setCountDistint(ndvs[i]);
+      stats.add(cs);
+    }
+    return stats;
+  }
+}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java
index 4d9d351af8f1..6e646a6ad502 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/stats/annotation/TestStatsRulesProcFactory.java
@@ -23,14 +23,25 @@
 import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.common.type.Timestamp;
 import org.apache.hadoop.hive.metastore.StatisticsTestUtils;
+import org.apache.hadoop.hive.ql.Context;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.tez.DagUtils;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
 import org.apache.hadoop.hive.ql.plan.Statistics;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
@@ -39,23 +50,37 @@
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.ql.plan.AggregationDesc;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.junit.Test;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.mockito.ArgumentCaptor;
+import org.mockito.MockedStatic;
 
 import java.util.Arrays;
 import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.stream.Stream;
 
 import org.apache.hadoop.hive.ql.exec.CommonJoinOperator;
 import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
 import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.mockStatic;
+import static org.mockito.Mockito.spy;
+import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
 import static org.apache.hadoop.hive.ql.optimizer.stats.annotation.StatsRulesProcFactory.FilterStatsRule.extractFloatFromLiteralValue;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 
-public class TestStatsRulesProcFactory {
+class TestStatsRulesProcFactory {
 
   private final static String COL_NAME = "col1";
   private final static ExprNodeDesc COL_EXPR = new  ExprNodeColumnDesc(
@@ -66,7 +91,7 @@ public class TestStatsRulesProcFactory {
   private final static long[] VALUES = { 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 4L, 5L, 6L, 7L };
 
   @Test
-  public void testComparisonRowCountZeroNonNullValues() throws SemanticException {
+  void testComparisonRowCountZeroNonNullValues() throws SemanticException {
     long numNulls = 2;
     long[] values = {};
     Statistics stats = createStatistics(values, numNulls);
@@ -80,7 +105,7 @@ public void testComparisonRowCountZeroNonNullValues() throws SemanticException {
   }
 
   @Test
-  public void testComparisonRowCountInvalidKll() throws SemanticException {
+  void testComparisonRowCountInvalidKll() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
     stats.getColumnStats().get(0).setHistogram(null);
@@ -102,253 +127,304 @@ public void testComparisonRowCountInvalidKll() throws SemanticException {
     assertEquals((VALUES.length + numNulls) / 3, numRows);
   }
 
-  @Test
-  public void testComparisonRowCountLessThan() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(3)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
+  /**
+   * HIVE-29625: IN-filter row-count estimate by NDV of the column.
+   *   unknown NDV (-1) -> factor *= 0.5 per IN value, currNumRows = round(rows * 0.5)
+   *   verified-zero NDV (0) -> factor = 0, no rows match
+   */
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("evaluateInExprCases")
+  void testEvaluateInExprByNDV(String name, long ndvOverride, long expectedRows)
+      throws SemanticException {
+    Statistics stats = createStatistics(VALUES, 0);
+    stats.getColumnStats().get(0).setCountDistint(ndvOverride);
 
-    assertEquals(8, numRows);
-  }
+    AnnotateStatsProcCtx ctx = spy(new AnnotateStatsProcCtx(null));
+    when(ctx.getConf()).thenReturn(new HiveConf());
 
-  @Test
-  public void testComparisonRowCountLessThanMin() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
+    ExprNodeDesc inExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+        new GenericUDFIn(),
+        Arrays.asList(COL_EXPR, createExprNodeConstantDesc(1), createExprNodeConstantDesc(2)));
 
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(1)));
     long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
+        stats, inExpr, ctx, Arrays.asList(COL_NAME), null, VALUES.length);
 
-    assertEquals(0, numRows);
+    assertEquals(expectedRows, numRows);
   }
 
-  @Test
-  public void testComparisonRowCountLessThanBelowMin() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(0)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
-
-    assertEquals(0, numRows);
+  private static Stream<Arguments> evaluateInExprCases() {
+    return Stream.of(
+        Arguments.of("unknownNDVAppliesHalfFactor", -1L, Math.round(VALUES.length * 0.5)),
+        Arguments.of("verifiedZeroReturnsZero",      0L, 0L)
+    );
   }
 
-  @Test
-  public void testComparisonRowCountLessThanMax() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
+  /**
+   * HIVE-29625: col = const row-count estimate by NDV of the column.
+   *   unknown NDV (-1) -> numRows/2 (13/2 = 6)
+   *   verified-zero NDV (0) -> 0 rows match
+   *   known NDV (n) -> uniform distribution numRows/n (13/7 = 1 ~ rounded to 2)
+   */
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("evaluateEqualCases")
+  void testEvaluateEqualByNDV(String name, long ndvOverride, long expectedRows)
+      throws SemanticException {
+    Statistics stats = createStatistics(VALUES, 0);
+    stats.getColumnStats().get(0).setCountDistint(ndvOverride);
 
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(7)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
+    AnnotateStatsProcCtx ctx = spy(new AnnotateStatsProcCtx(null));
+    when(ctx.getConf()).thenReturn(new HiveConf());
 
-    assertEquals(12, numRows);
-  }
+    ExprNodeDesc eqExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+        new GenericUDFOPEqual(),
+        Arrays.asList(COL_EXPR, createExprNodeConstantDesc(1)));
 
-  @Test
-  public void testComparisonRowCountLessThanAboveMax() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(8)));
     long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
+        stats, eqExpr, ctx, Arrays.asList(COL_NAME), null, VALUES.length);
 
-    assertEquals(13, numRows);
+    assertEquals(expectedRows, numRows);
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrLessThan() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(3)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
-
-    assertEquals(9, numRows);
+  private static Stream<Arguments> evaluateEqualCases() {
+    return Stream.of(
+        Arguments.of("unknownNDVUsesHalfRows",      -1L, 6L),
+        Arguments.of("verifiedZeroReturnsZero",      0L, 0L),
+        Arguments.of("knownNDVUsesUniformDistribution", 7L, 2L)
+    );
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrLessThanMin() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(1)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
-
-    assertEquals(1, numRows);
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("groupByFinalCases")
+  void testGroupByStatsRuleFinalCardinality(String name, long keyNdv, long expectedRows) throws SemanticException {
+    assertGroupByFinalCardinality(keyNdv, expectedRows);
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrLessThanBelowMin() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(0)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
-
-    assertEquals(0, numRows);
+  private static Stream<Arguments> groupByFinalCases() {
+    return Stream.of(
+        Arguments.of("ndvUnknownAppliesFallback",            -1L, 500L),
+        Arguments.of("ndvVerifiedZeroFlowsThroughClampedToOne", 0L,  1L),
+        Arguments.of("ndvKnownUsesProduct",                  10L,  10L)
+    );
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrLessThanMax() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(7)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("groupByHashCases")
+  void testCheckMapSideAggregationHashCardinality(String name, long keyNdv, long expectedRows)
+      throws SemanticException {
+    assertGroupByHashCardinality(keyNdv, expectedRows);
+  }
 
-    assertEquals(13, numRows);
+  private static Stream<Arguments> groupByHashCases() {
+    return Stream.of(
+        Arguments.of("ndvUnknownFallsBackToHalfParent",  -1L, 500L),
+        Arguments.of("ndvKnownUsesProduct",             100L, 100L)
+    );
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrLessThanAboveMax() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(8)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
+  private void assertGroupByHashCardinality(long keyNdv, long expectedRows) throws SemanticException {
+    Statistics parentStats = new Statistics(1000, 8000, 0, 0);
+    parentStats.setBasicStatsState(Statistics.State.COMPLETE);
+    parentStats.setColumnStatsState(Statistics.State.COMPLETE);
+    ColStatistics keyCol = new ColStatistics("k", "int");
+    keyCol.setCountDistint(keyNdv);
+    keyCol.setNumNulls(0);
+    parentStats.setColumnStats(Collections.singletonList(keyCol));
+
+    @SuppressWarnings("unchecked")
+    Operator<? extends OperatorDesc> parent = mock(Operator.class);
+    when(parent.getStatistics()).thenReturn(parentStats);
+    when(parent.getParentOperators()).thenReturn(Collections.emptyList());
+
+    GroupByDesc gbyDesc = mock(GroupByDesc.class);
+    when(gbyDesc.getMode()).thenReturn(GroupByDesc.Mode.HASH);
+    when(gbyDesc.getAggregators()).thenReturn(Collections.emptyList());
+    when(gbyDesc.isGroupingSetsPresent()).thenReturn(false);
+    ExprNodeColumnDesc keyExpr = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "k", "table", false);
+    when(gbyDesc.getKeys()).thenReturn(Collections.singletonList(keyExpr));
+
+    GroupByOperator gop = mock(GroupByOperator.class);
+    when(gop.getParentOperators()).thenReturn(Collections.singletonList(parent));
+    when(gop.getConf()).thenReturn(gbyDesc);
+    Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
+    colExprMap.put("_col0", keyExpr);
+    when(gop.getColumnExprMap()).thenReturn(colExprMap);
+    RowSchema rs = mock(RowSchema.class);
+    ColumnInfo colInfo = new ColumnInfo("_col0", TypeInfoFactory.intTypeInfo, "table", false);
+    when(rs.getSignature()).thenReturn(Collections.singletonList(colInfo));
+    when(rs.getColumnInfo("_col0")).thenReturn(colInfo);
+    when(gop.getSchema()).thenReturn(rs);
+
+    Context context = mock(Context.class);
+    HiveConf conf = new HiveConf();
+    conf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_REEXECUTION_ENABLED, false);
+    when(context.getConf()).thenReturn(conf);
+    ParseContext pctx = mock(ParseContext.class);
+    when(pctx.getContext()).thenReturn(context);
+    AnnotateStatsProcCtx ctx = spy(new AnnotateStatsProcCtx(null));
+    when(ctx.getConf()).thenReturn(conf);
+    when(ctx.getParseContext()).thenReturn(pctx);
+
+    // checkMapSideAggregation calls DagUtils.getContainerResource(conf) to compute
+    // the available hash-aggregation memory. Stub it to a generous 1024 MB so the
+    // estimated hash table size stays well under the threshold and hashAgg is selected.
+    try (MockedStatic<DagUtils> dagMock = mockStatic(DagUtils.class)) {
+      Resource res = mock(Resource.class);
+      when(res.getMemorySize()).thenReturn(1024L);
+      dagMock.when(() -> DagUtils.getContainerResource(any())).thenReturn(res);
+
+      new StatsRulesProcFactory.GroupByStatsRule().process(gop, null, ctx, (Object[]) null);
+    }
 
-    assertEquals(13, numRows);
+    ArgumentCaptor<Statistics> captor = ArgumentCaptor.forClass(Statistics.class);
+    verify(gop).setStatistics(captor.capture());
+    assertEquals(expectedRows, captor.getValue().getNumRows());
   }
 
-  @Test
-  public void testComparisonRowCountGreaterThan() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(5)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
+  private void assertGroupByFinalCardinality(long keyNdv, long expectedRows) throws SemanticException {
+    Statistics parentStats = new Statistics(1000, 8000, 0, 0);
+    parentStats.setBasicStatsState(Statistics.State.COMPLETE);
+    parentStats.setColumnStatsState(Statistics.State.COMPLETE);
+    ColStatistics keyCol = new ColStatistics("k", "int");
+    keyCol.setCountDistint(keyNdv);
+    keyCol.setNumNulls(0);
+    parentStats.setColumnStats(Collections.singletonList(keyCol));
 
-    assertEquals(2, numRows);
-  }
+    @SuppressWarnings("unchecked")
+    Operator<? extends OperatorDesc> parent = mock(Operator.class);
+    when(parent.getStatistics()).thenReturn(parentStats);
+    when(parent.getParentOperators()).thenReturn(Collections.emptyList());
+
+    GroupByDesc gbyDesc = mock(GroupByDesc.class);
+    when(gbyDesc.getMode()).thenReturn(GroupByDesc.Mode.FINAL);
+    when(gbyDesc.getAggregators()).thenReturn(Collections.emptyList());
+    when(gbyDesc.isGroupingSetsPresent()).thenReturn(false);
+    ExprNodeColumnDesc keyExpr = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "k", "table", false);
+    when(gbyDesc.getKeys()).thenReturn(Collections.singletonList(keyExpr));
+
+    GroupByOperator gop = mock(GroupByOperator.class);
+    when(gop.getParentOperators()).thenReturn(Collections.singletonList(parent));
+    when(gop.getConf()).thenReturn(gbyDesc);
+    Map<String, ExprNodeDesc> colExprMap = new HashMap<>();
+    colExprMap.put("_col0", keyExpr);
+    when(gop.getColumnExprMap()).thenReturn(colExprMap);
+    RowSchema rs = mock(RowSchema.class);
+    ColumnInfo colInfo = new ColumnInfo("_col0", TypeInfoFactory.intTypeInfo, "table", false);
+    when(rs.getSignature()).thenReturn(Collections.singletonList(colInfo));
+    when(rs.getColumnInfo("_col0")).thenReturn(colInfo);
+    when(gop.getSchema()).thenReturn(rs);
+
+    Context context = mock(Context.class);
+    HiveConf conf = new HiveConf();
+    conf.setBoolVar(HiveConf.ConfVars.HIVE_QUERY_REEXECUTION_ENABLED, false);
+    when(context.getConf()).thenReturn(conf);
+    ParseContext pctx = mock(ParseContext.class);
+    when(pctx.getContext()).thenReturn(context);
+    AnnotateStatsProcCtx ctx = spy(new AnnotateStatsProcCtx(null));
+    when(ctx.getConf()).thenReturn(conf);
+    when(ctx.getParseContext()).thenReturn(pctx);
 
-  @Test
-  public void testComparisonRowCountGreaterThanMin() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(1)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
+    new StatsRulesProcFactory.GroupByStatsRule().process(gop, null, ctx, (Object[]) null);
 
-    assertEquals(12, numRows);
+    ArgumentCaptor<Statistics> captor = ArgumentCaptor.forClass(Statistics.class);
+    verify(gop).setStatistics(captor.capture());
+    assertEquals(expectedRows, captor.getValue().getNumRows());
   }
 
-  @Test
-  public void testComparisonRowCountGreaterThanBelowMin() throws SemanticException {
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("comparisonRowCountLessThanCases")
+  void testComparisonRowCountLessThan(String name, int constant, long expected) throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
+
     ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(0)));
+        new GenericUDFOPLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(constant)));
     long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
         stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
 
-    assertEquals(13, numRows);
+    assertEquals(expected, numRows);
   }
 
-  @Test
-  public void testComparisonRowCountGreaterThanMax() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(7)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
-
-    assertEquals(0, numRows);
+  private static Stream<Arguments> comparisonRowCountLessThanCases() {
+    return Stream.of(
+        Arguments.of("midRange",      3, 8L),
+        Arguments.of("equalToMin",    1, 0L),
+        Arguments.of("belowMin",      0, 0L),
+        Arguments.of("equalToMax",    7, 12L),
+        Arguments.of("aboveMax",      8, 13L)
+    );
   }
 
-  @Test
-  public void testComparisonRowCountGreaterThanAboveMax() throws SemanticException {
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("comparisonRowCountEqualOrLessThanCases")
+  void testComparisonRowCountEqualOrLessThan(String name, int constant, long expected) throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
     ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(8)));
+        new GenericUDFOPEqualOrLessThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(constant)));
     long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
         stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
 
-    assertEquals(0, numRows);
+    assertEquals(expected, numRows);
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrGreaterThan() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(5)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
-
-    assertEquals(3, numRows);
+  private static Stream<Arguments> comparisonRowCountEqualOrLessThanCases() {
+    return Stream.of(
+        Arguments.of("midRange",      3, 9L),
+        Arguments.of("equalToMin",    1, 1L),
+        Arguments.of("belowMin",      0, 0L),
+        Arguments.of("equalToMax",    7, 13L),
+        Arguments.of("aboveMax",      8, 13L)
+    );
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrGreaterThanMin() throws SemanticException {
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("comparisonRowCountGreaterThanCases")
+  void testComparisonRowCountGreaterThan(String name, int constant, long expected) throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
     ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(1)));
+        new GenericUDFOPGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(constant)));
     long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
         stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
 
-    assertEquals(13, numRows);
+    assertEquals(expected, numRows);
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrGreaterThanBelowMin() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(0)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
-
-    assertEquals(13, numRows);
+  private static Stream<Arguments> comparisonRowCountGreaterThanCases() {
+    return Stream.of(
+        Arguments.of("midRange",      5, 2L),
+        Arguments.of("equalToMin",    1, 12L),
+        Arguments.of("belowMin",      0, 13L),
+        Arguments.of("equalToMax",    7, 0L),
+        Arguments.of("aboveMax",      8, 0L)
+    );
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrGreaterThanMax() throws SemanticException {
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("comparisonRowCountEqualOrGreaterThanCases")
+  void testComparisonRowCountEqualOrGreaterThan(String name, int constant, long expected) throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
     ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(7)));
+        new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(constant)));
     long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
         stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
 
-    assertEquals(1, numRows);
+    assertEquals(expected, numRows);
   }
 
-  @Test
-  public void testComparisonRowCountEqualOrGreaterThanBeyondMax() throws SemanticException {
-    long numNulls = 2;
-    Statistics stats = createStatistics(VALUES, numNulls);
-    ExprNodeDesc exprNodeDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.intTypeInfo,
-        new GenericUDFOPEqualOrGreaterThan(), Arrays.asList(COL_EXPR, createExprNodeConstantDesc(8)));
-    long numRows = new StatsRulesProcFactory.FilterStatsRule().evaluateExpression(
-        stats, exprNodeDesc, STATS_PROC_CTX, Collections.emptyList(), null, VALUES.length + numNulls);
-
-    assertEquals(0, numRows);
+  private static Stream<Arguments> comparisonRowCountEqualOrGreaterThanCases() {
+    return Stream.of(
+        Arguments.of("midRange",      5, 3L),
+        Arguments.of("equalToMin",    1, 13L),
+        Arguments.of("belowMin",      0, 13L),
+        Arguments.of("equalToMax",    7, 1L),
+        Arguments.of("aboveMax",      8, 0L)
+    );
   }
 
   @Test
-  public void testComparisonRowCountEqualOrLessThanWhenMinEqualMax() throws SemanticException {
+  void testComparisonRowCountEqualOrLessThanWhenMinEqualMax() throws SemanticException {
     long[] values = { 1L, 1L };
     long numNulls = 2;
     Statistics stats = createStatistics(values, numNulls);
@@ -362,7 +438,7 @@ public void testComparisonRowCountEqualOrLessThanWhenMinEqualMax() throws Semant
   }
 
   @Test
-  public void testComparisonRowCountEqualOrGreaterThanWhenMinEqualMax() throws SemanticException {
+  void testComparisonRowCountEqualOrGreaterThanWhenMinEqualMax() throws SemanticException {
     long[] values = { 1L, 1L };
     long numNulls = 2;
     Statistics stats = createStatistics(values, numNulls);
@@ -376,7 +452,7 @@ public void testComparisonRowCountEqualOrGreaterThanWhenMinEqualMax() throws Sem
   }
 
   @Test
-  public void testBetween() throws SemanticException {
+  void testBetween() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -390,7 +466,7 @@ public void testBetween() throws SemanticException {
   }
 
   @Test
-  public void testLiteralExtraction() {
+  void testLiteralExtraction() {
     final double DELTA = 1e-5;
 
     assertEquals((float) 100,
@@ -420,7 +496,7 @@ public void testLiteralExtraction() {
   }
 
   @Test
-  public void testLiteralExtractionFailures() {
+  void testLiteralExtractionFailures() {
     // make sure the correct exceptions are raised so that we can default to standard computation
     String[] types = {"int", "tinyint", "smallint", "bigint", "date", "timestamp", "float", "double"};
     for (String type : types) {
@@ -437,7 +513,7 @@ public void testLiteralExtractionFailures() {
   }
 
   @Test
-  public void testBetweenLeftLowerThanMin() throws SemanticException {
+  void testBetweenLeftLowerThanMin() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -457,7 +533,7 @@ public void testBetweenLeftLowerThanMin() throws SemanticException {
   }
 
   @Test
-  public void testBetweenLeftLowerThanMinRightHigherThanMax() throws SemanticException {
+  void testBetweenLeftLowerThanMinRightHigherThanMax() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -471,7 +547,7 @@ public void testBetweenLeftLowerThanMinRightHigherThanMax() throws SemanticExcep
   }
 
   @Test
-  public void testBetweenRightHigherThanMax() throws SemanticException {
+  void testBetweenRightHigherThanMax() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -491,7 +567,7 @@ public void testBetweenRightHigherThanMax() throws SemanticException {
   }
 
   @Test
-  public void testBetweenRightLowerThanMin() throws SemanticException {
+  void testBetweenRightLowerThanMin() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -505,7 +581,7 @@ public void testBetweenRightLowerThanMin() throws SemanticException {
   }
 
   @Test
-  public void testBetweenLeftHigherThanMax() throws SemanticException {
+  void testBetweenLeftHigherThanMax() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -519,7 +595,7 @@ public void testBetweenLeftHigherThanMax() throws SemanticException {
   }
 
   @Test
-  public void testBetweenLeftEqualMax() throws SemanticException {
+  void testBetweenLeftEqualMax() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -533,7 +609,7 @@ public void testBetweenLeftEqualMax() throws SemanticException {
   }
 
   @Test
-  public void testNotBetween() throws SemanticException {
+  void testNotBetween() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -558,7 +634,7 @@ public void testNotBetween() throws SemanticException {
   }
 
   @Test
-  public void testNotBetweenLowerThanMinHigherThanMax() throws SemanticException {
+  void testNotBetweenLowerThanMinHigherThanMax() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -572,7 +648,7 @@ public void testNotBetweenLowerThanMinHigherThanMax() throws SemanticException {
   }
 
   @Test
-  public void testNotBetweenLeftEqualsRight() throws SemanticException {
+  void testNotBetweenLeftEqualsRight() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -586,7 +662,7 @@ public void testNotBetweenLeftEqualsRight() throws SemanticException {
   }
 
   @Test
-  public void testNotBetweenRightLowerThanLeft() throws SemanticException {
+  void testNotBetweenRightLowerThanLeft() throws SemanticException {
     long numNulls = 2;
     Statistics stats = createStatistics(VALUES, numNulls);
 
@@ -648,7 +724,7 @@ private static ColStatistics createColStatistics(
    * Without the fix, valuesCount = numRows - (-1) = numRows + 1 (wrong).
    */
   @Test
-  public void testComputeAggregateColumnMinMaxWithUnknownNumNulls() throws SemanticException {
+  void testComputeAggregateColumnMinMaxWithUnknownNumNulls() throws SemanticException {
     ColStatistics cs = new ColStatistics("_col0", "bigint");
     HiveConf conf = new HiveConf();
 
@@ -676,14 +752,14 @@ public void testComputeAggregateColumnMinMaxWithUnknownNumNulls() throws Semanti
     // Verify: With the fix, COUNT Range should be (0, 100)
     // numNulls=-1 is treated as 0, so valuesCount = 100 - 0 = 100
     // Without the fix, valuesCount = 100 - (-1) = 101 (WRONG)
-    assertNotNull("Range should be set on COUNT column", cs.getRange());
-    assertEquals("COUNT min should be 0", 0L, ((Number) cs.getRange().minValue).longValue());
-    assertEquals("COUNT max should be 100 (numRows), not 101",
-        100L, ((Number) cs.getRange().maxValue).longValue());
+    assertNotNull(cs.getRange(), "Range should be set on COUNT column");
+    assertEquals(0L, ((Number) cs.getRange().minValue).longValue(), "COUNT min should be 0");
+    assertEquals(100L, ((Number) cs.getRange().maxValue).longValue(),
+        "COUNT max should be 100 (numRows), not 101");
   }
 
   @Test
-  public void testComputeAggregateColumnMinMaxWithKnownNumNulls() throws SemanticException {
+  void testComputeAggregateColumnMinMaxWithKnownNumNulls() throws SemanticException {
     ColStatistics cs = new ColStatistics("_col0", "bigint");
     HiveConf conf = new HiveConf();
 
@@ -708,10 +784,58 @@ public void testComputeAggregateColumnMinMaxWithKnownNumNulls() throws SemanticE
         cs, conf, agg, "bigint", parentStats);
 
     // With known numNulls=20, valuesCount = 100 - 20 = 80
-    assertNotNull("Range should be set", cs.getRange());
+    assertNotNull(cs.getRange(), "Range should be set");
     assertEquals(0L, ((Number) cs.getRange().minValue).longValue());
-    assertEquals("COUNT max should be 80 (numRows - numNulls)",
-        80L, ((Number) cs.getRange().maxValue).longValue());
+    assertEquals(80L, ((Number) cs.getRange().maxValue).longValue(),
+        "COUNT max should be 80 (numRows - numNulls)");
+  }
+
+  /**
+   * HIVE-29625: COUNT(DISTINCT col) uses parentCS.getCountDistint() as the max range.
+   * When NDV is unknown (-1) the new guard short-circuits before building a Range with
+   * negative maxValue. When NDV is known, Range is set to [0, NDV].
+   */
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("computeAggregateColumnMinMaxDistinctCases")
+  void testComputeAggregateColumnMinMaxDistinctByNDV(
+      String name, long parentNDV, Long expectedMax) throws SemanticException {
+    ColStatistics cs = new ColStatistics("_col0", "bigint");
+    HiveConf conf = new HiveConf();
+
+    ColStatistics parentColStats = new ColStatistics("val", "int");
+    parentColStats.setNumNulls(0);
+    parentColStats.setCountDistint(parentNDV);
+    parentColStats.setRange(1, 100);
+
+    Statistics parentStats = new Statistics(100, 400, 400, 400);
+    parentStats.addToColumnStats(Collections.singletonList(parentColStats));
+
+    ExprNodeColumnDesc colExpr = new ExprNodeColumnDesc(
+        TypeInfoFactory.intTypeInfo, "val", "t", false);
+    AggregationDesc agg = new AggregationDesc();
+    agg.setGenericUDAFName("count");
+    agg.setParameters(Collections.singletonList(colExpr));
+    agg.setDistinct(true);
+    agg.setMode(GenericUDAFEvaluator.Mode.COMPLETE);
+
+    StatsRulesProcFactory.GroupByStatsRule.computeAggregateColumnMinMax(
+        cs, conf, agg, "bigint", parentStats);
+
+    if (expectedMax == null) {
+      assertNull(cs.getRange(), "Range should NOT be set when DISTINCT NDV is unknown");
+    } else {
+      assertNotNull(cs.getRange(), "Range should be set when DISTINCT NDV is known");
+      assertEquals(0L, ((Number) cs.getRange().minValue).longValue());
+      assertEquals(expectedMax.longValue(), ((Number) cs.getRange().maxValue).longValue(),
+          "COUNT DISTINCT max should equal the parent NDV");
+    }
+  }
+
+  private static Stream<Arguments> computeAggregateColumnMinMaxDistinctCases() {
+    return Stream.of(
+        Arguments.of("unknownNDVReturnsEarlyNoRange", -1L, null),
+        Arguments.of("knownNDVSetsRangeUpToNDV",      50L, 50L)
+    );
   }
 
   /**
@@ -720,7 +844,7 @@ public void testComputeAggregateColumnMinMaxWithKnownNumNulls() throws SemanticE
    * Without the fix, LEFT_OUTER_JOIN would calculate: newNumNulls = oldNumNulls + leftUnmatchedRows = -1 + 100 = 99
    */
   @Test
-  public void testUpdateNumNullsPreservesUnknownNumNulls() {
+  void testUpdateNumNullsPreservesUnknownNumNulls() {
     StatsRulesProcFactory.JoinStatsRule joinStatsRule = new StatsRulesProcFactory.JoinStatsRule();
 
     // Create ColStatistics with numNulls = -1 (unknown)
@@ -749,7 +873,101 @@ public void testUpdateNumNullsPreservesUnknownNumNulls() {
     joinStatsRule.updateNumNulls(colStats, 100L, 100L, 1000L, 0L, mockJop);
 
     // Assert that numNulls is still -1 (unchanged)
-    assertEquals("Unknown numNulls (-1) should be preserved after updateNumNulls",
-        -1L, colStats.getNumNulls());
+    assertEquals(-1L, colStats.getNumNulls(),
+        "Unknown numNulls (-1) should be preserved after updateNumNulls");
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("calculateUnmatchedRowsForOuterCases")
+  void testCalculateUnmatchedRowsForOuter(
+      String name, long ndv, long distinctUnmatched, long expected) {
+    assertCalculateUnmatchedRowsForOuter(ndv, distinctUnmatched, expected);
+  }
+
+  private static Stream<Arguments> calculateUnmatchedRowsForOuterCases() {
+    return Stream.of(
+        Arguments.of("distinctValUnknownReturnsInputRowCount",         -1L,  5L, 100L),
+        Arguments.of("distinctValVerifiedZeroReturnsInputRowCount",     0L,  5L, 100L),
+        Arguments.of("distinctUnmatchedUnknownReturnsInputRowCount",   10L, -1L, 100L),
+        Arguments.of("distinctUnmatchedExceedsReturnsInputRowCount",   10L, 15L, 100L),
+        Arguments.of("normalCaseDivides",                              10L,  2L,  20L)
+    );
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("computeRowCountAssumingInnerJoinCases")
+  void testComputeRowCountAssumingInnerJoin(String name, long denom, long expected) {
+    assertComputeRowCountAssumingInnerJoin(denom, expected);
+  }
+
+  private static Stream<Arguments> computeRowCountAssumingInnerJoinCases() {
+    return Stream.of(
+        Arguments.of("denomPositiveDivides",         10L,   2000L),
+        Arguments.of("denomZeroClampsToOne",          0L,  20000L),
+        Arguments.of("denomNegativeClampsToOne",     -1L,  20000L)
+    );
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("updateColStatsCases")
+  void testUpdateColStats(String name, long initialNdv, long expectedNdv) {
+    ColStatistics cs = new ColStatistics("k", "int");
+    cs.setCountDistint(initialNdv);
+    cs.setNumNulls(0);
+    Statistics stats = new Statistics(1000, 8000, 0, 0);
+    stats.setColumnStats(Collections.singletonList(cs));
+
+    Map<String, Byte> reversedExprs = new HashMap<>();
+    reversedExprs.put("k", (byte) 0);
+    JoinCondDesc joinCond = mock(JoinCondDesc.class);
+    when(joinCond.getType()).thenReturn(JoinDesc.INNER_JOIN);
+    JoinDesc joinDesc = mock(JoinDesc.class);
+    when(joinDesc.getReversedExprs()).thenReturn(reversedExprs);
+    when(joinDesc.getConds()).thenReturn(new JoinCondDesc[]{joinCond});
+    when(joinDesc.getJoinKeys()).thenReturn(new ExprNodeDesc[][]{});
+    @SuppressWarnings("unchecked")
+    CommonJoinOperator<JoinDesc> jop = mock(CommonJoinOperator.class);
+    when(jop.getConf()).thenReturn(joinDesc);
+    RowSchema schema = mock(RowSchema.class);
+    when(schema.getColumnNames()).thenReturn(Collections.singletonList("k"));
+    when(schema.getSignature()).thenReturn(Collections.emptyList());
+    when(jop.getSchema()).thenReturn(schema);
+    Map<Integer, Long> rowCountParents = new HashMap<>();
+    rowCountParents.put(0, 1000L);
+    HiveConf conf = new HiveConf();
+    conf.setBoolVar(HiveConf.ConfVars.HIVE_STATS_JOIN_NDV_READJUSTMENT, false);
+
+    new StatsRulesProcFactory.JoinStatsRule().updateColStats(
+        conf, stats, 0L, 0L, 500L, jop, rowCountParents);
+
+    assertEquals(expectedNdv, cs.getCountDistint());
+  }
+
+  private static Stream<Arguments> updateColStatsCases() {
+    return Stream.of(
+        Arguments.of("unknownNdvSkipsMath",     -1L,  -1L),
+        Arguments.of("knownNdvScaledByRatio", 100L,  50L)
+    );
+  }
+
+  private void assertComputeRowCountAssumingInnerJoin(long denom, long expected) {
+    StatsRulesProcFactory.JoinStatsRule rule = new StatsRulesProcFactory.JoinStatsRule();
+    long actual = rule.computeRowCountAssumingInnerJoin(Arrays.asList(100L, 200L), denom, null);
+    assertEquals(expected, actual);
+  }
+
+  private void assertCalculateUnmatchedRowsForOuter(long ndv, long distinctUnmatched, long expected) {
+    HiveConf conf = new HiveConf();
+    ColStatistics cs = new ColStatistics("k", "int");
+    cs.setCountDistint(ndv);
+    cs.setNumNulls(0);
+    Statistics stats = new Statistics(100, 400, 0, 0);
+    stats.setColumnStats(Collections.singletonList(cs));
+
+    StatsRulesProcFactory.JoinStatsRule rule = new StatsRulesProcFactory.JoinStatsRule();
+    long actual = rule.calculateUnmatchedRowsForOuter(
+        conf, 100L, Collections.singletonList("k"), stats, distinctUnmatched);
+
+    assertEquals(expected, actual);
   }
 }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java
index c009472fed0a..679bf28734ed 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/TestStatsUtils.java
@@ -25,6 +25,7 @@
 
 import java.lang.reflect.Field;
 import java.lang.reflect.Modifier;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
@@ -32,13 +33,16 @@
 
 import org.apache.commons.lang3.reflect.FieldUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Date;
 import org.apache.hadoop.hive.metastore.api.DateColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
+import org.apache.hadoop.hive.metastore.api.StringColumnStatsData;
 import org.apache.hadoop.hive.metastore.api.Timestamp;
 import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData;
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
@@ -285,6 +289,204 @@ void testStatisticsAddToColumnStatsPropagatesUnknownFromExisting() {
     assertEquals(-1, merged.getNumNulls(), "Unknown numNulls (-1) should be propagated when existing is unknown");
   }
 
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("addToColumnStatsCountDistinctCases")
+  void testStatisticsAddToColumnStatsCountDistinctMerge(
+      String scenarioName, long existingNdv, long incomingNdv, long expectedMergedNdv) {
+    Statistics stats = new Statistics(1000, 8000, 0, 0);
+    ColStatistics existing = createColStats("col1", existingNdv, 0);
+    stats.setColumnStats(Collections.singletonList(existing));
+
+    ColStatistics incoming = createColStats("col1", incomingNdv, 0);
+    stats.addToColumnStats(Collections.singletonList(incoming));
+
+    ColStatistics merged = stats.getColumnStatisticsFromColName("col1");
+    assertEquals(expectedMergedNdv, merged.getCountDistint(),
+        "countDistinct after merge");
+  }
+
+  private static Stream<Arguments> addToColumnStatsCountDistinctCases() {
+    return Stream.of(
+        Arguments.of("incomingUnknownPropagates",   5L, -1L, -1L),
+        Arguments.of("existingUnknownPropagates", -1L,  5L, -1L),
+        Arguments.of("bothUnknownStaysUnknown",   -1L, -1L, -1L),
+        Arguments.of("maxPicksIncomingWhenHigher", 3L,  7L,  7L),
+        Arguments.of("maxPicksExistingWhenHigher", 7L,  3L,  7L)
+    );
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("containsUnknownNDVCases")
+  void testContainsUnknownNDV(String scenarioName, List<Long> input, boolean expected) {
+    assertEquals(expected, StatsUtils.containsUnknownNDV(input),
+        "containsUnknownNDV(" + input + ")");
+  }
+
+  private static Stream<Arguments> containsUnknownNDVCases() {
+    return Stream.of(
+        Arguments.of("allPositive",                Arrays.asList(1L, 2L, 3L),   false),
+        Arguments.of("containsZero_NotUnknown",    Arrays.asList(1L, 0L, 3L),   false),
+        Arguments.of("singleUnknown",              Arrays.asList(1L, -1L, 3L),  true),
+        Arguments.of("allUnknown",                 Arrays.asList(-1L, -1L, -1L), true),
+        Arguments.of("firstIsUnknown_ShortCircuit", Arrays.asList(-1L, 2L, 3L),  true),
+        Arguments.of("emptyList",                  Collections.emptyList(),     false)
+    );
+  }
+
+  @Test
+  void testAddWithExpDecayReturnsUnknownWhenAnyInputIsUnknown() {
+    Long result = StatsUtils.addWithExpDecay(Arrays.asList(10L, -1L, 5L));
+    assertEquals(-1L, result, "addWithExpDecay should propagate unknown NDV (-1) when present");
+  }
+
+  @Test
+  void testAddWithExpDecayComputesWhenAllInputsKnown() {
+    Long result = StatsUtils.addWithExpDecay(Arrays.asList(100L, 25L));
+    // Exponential decay: 100 * 25^(1/2) = 100 * 5 = 500.
+    assertEquals(500L, result, "addWithExpDecay should return the exponential-decay denominator for known inputs");
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("computeNDVGroupingColumnsCases")
+  void testComputeNDVGroupingColumns(String scenarioName, List<ColStatistics> colStats,
+      Statistics.State parentColStatsState, boolean expDecay, long expected) {
+    Statistics parentStats = new Statistics(1000, 8000, 0, 0);
+    parentStats.setColumnStatsState(parentColStatsState);
+
+    long result = StatsUtils.computeNDVGroupingColumns(colStats, parentStats, expDecay);
+
+    assertEquals(expected, result, scenarioName);
+  }
+
+  private static Stream<Arguments> computeNDVGroupingColumnsCases() {
+    return Stream.of(
+        Arguments.of("allKnownReturnsProduct",
+            Arrays.asList(makeColStat("c1", 10), makeColStat("c2", 20)),
+            Statistics.State.COMPLETE, false, 200L),
+        Arguments.of("unknownColumnReturnsMinusOne",
+            Arrays.asList(makeColStat("c1", 10), makeColStat("c2", -1)),
+            Statistics.State.COMPLETE, false, -1L),
+        Arguments.of("emptyColumnsReturnsOne",
+            Collections.<ColStatistics>emptyList(),
+            Statistics.State.COMPLETE, false, 1L),
+        Arguments.of("nullColStatWithCompleteParentSkipped",
+            Arrays.asList(null, makeColStat("c2", 10)),
+            Statistics.State.COMPLETE, false, 10L),
+        Arguments.of("nullColStatWithPartialParentReturnsMinusOne",
+            Arrays.asList((ColStatistics) null),
+            Statistics.State.PARTIAL, false, -1L),
+        Arguments.of("expDecayWithKnownInputs",
+            Arrays.asList(makeColStat("c1", 100), makeColStat("c2", 25)),
+            Statistics.State.COMPLETE, true, 500L),
+        Arguments.of("expDecayWithUnknownPropagates",
+            Arrays.asList(makeColStat("c1", 100), makeColStat("c2", -1)),
+            Statistics.State.COMPLETE, true, -1L)
+    );
+  }
+
+  private static ColStatistics makeColStat(String name, long ndv) {
+    ColStatistics cs = new ColStatistics(name, "string");
+    cs.setCountDistint(ndv);
+    cs.setNumNulls(0);
+    return cs;
+  }
+
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("getColStatisticsUnsetNumDVsCases")
+  void testGetColStatisticsReturnsUnknownNDVWhenNumDVsNotSet(
+      String typeName, ColumnStatisticsData data) {
+    ColumnStatisticsObj cso = new ColumnStatisticsObj();
+    cso.setColName("test_col");
+    cso.setColType(typeName);
+    cso.setStatsData(data);
+
+    ColStatistics cs = StatsUtils.getColStatistics(cso, "test_col");
+
+    assertNotNull(cs, "ColStatistics should not be null for " + typeName);
+    assertEquals(-1, cs.getCountDistint(),
+        "When numDVs is unset for " + typeName + ", NDV should be -1");
+  }
+
+  private static Stream<Arguments> getColStatisticsUnsetNumDVsCases() {
+    LongColumnStatsData longStats = new LongColumnStatsData();
+    longStats.setNumNulls(10);
+    // numDVs NOT set
+
+    DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
+    doubleStats.setNumNulls(10);
+
+    StringColumnStatsData stringStats = new StringColumnStatsData();
+    stringStats.setNumNulls(10);
+    stringStats.setAvgColLen(5.0);
+    stringStats.setMaxColLen(20);
+
+    BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
+    binaryStats.setNumNulls(10);
+    binaryStats.setAvgColLen(5.0);
+    binaryStats.setMaxColLen(20);
+
+    TimestampColumnStatsData timestampStats = new TimestampColumnStatsData();
+    timestampStats.setNumNulls(10);
+
+    DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
+    decimalStats.setNumNulls(10);
+
+    DateColumnStatsData dateStats = new DateColumnStatsData();
+    dateStats.setNumNulls(10);
+
+    return Stream.of(
+        Arguments.of(serdeConstants.BIGINT_TYPE_NAME,    wrapLong(longStats)),
+        Arguments.of(serdeConstants.DOUBLE_TYPE_NAME,    wrapDouble(doubleStats)),
+        Arguments.of(serdeConstants.STRING_TYPE_NAME,    wrapString(stringStats)),
+        Arguments.of(serdeConstants.BINARY_TYPE_NAME,    wrapBinary(binaryStats)),
+        Arguments.of(serdeConstants.TIMESTAMP_TYPE_NAME, wrapTimestamp(timestampStats)),
+        Arguments.of(serdeConstants.DECIMAL_TYPE_NAME,   wrapDecimal(decimalStats)),
+        Arguments.of(serdeConstants.DATE_TYPE_NAME,      wrapDate(dateStats))
+    );
+  }
+
+  private static ColumnStatisticsData wrapLong(LongColumnStatsData s) {
+    ColumnStatisticsData d = new ColumnStatisticsData();
+    d.setLongStats(s);
+    return d;
+  }
+
+  private static ColumnStatisticsData wrapDouble(DoubleColumnStatsData s) {
+    ColumnStatisticsData d = new ColumnStatisticsData();
+    d.setDoubleStats(s);
+    return d;
+  }
+
+  private static ColumnStatisticsData wrapString(StringColumnStatsData s) {
+    ColumnStatisticsData d = new ColumnStatisticsData();
+    d.setStringStats(s);
+    return d;
+  }
+
+  private static ColumnStatisticsData wrapBinary(BinaryColumnStatsData s) {
+    ColumnStatisticsData d = new ColumnStatisticsData();
+    d.setBinaryStats(s);
+    return d;
+  }
+
+  private static ColumnStatisticsData wrapTimestamp(TimestampColumnStatsData s) {
+    ColumnStatisticsData d = new ColumnStatisticsData();
+    d.setTimestampStats(s);
+    return d;
+  }
+
+  private static ColumnStatisticsData wrapDecimal(DecimalColumnStatsData s) {
+    ColumnStatisticsData d = new ColumnStatisticsData();
+    d.setDecimalStats(s);
+    return d;
+  }
+
+  private static ColumnStatisticsData wrapDate(DateColumnStatsData s) {
+    ColumnStatisticsData d = new ColumnStatisticsData();
+    d.setDateStats(s);
+    return d;
+  }
+
   @Test
   void testGetColStatisticsBooleanWithUnknownNumTrues() {
     ColumnStatisticsObj cso = new ColumnStatisticsObj();
@@ -465,6 +667,53 @@ void testUpdateStatsPreservesUnknownNumNulls() {
     assertEquals(-1, updated.getNumNulls(), "Unknown numNulls (-1) should be preserved after scaling");
   }
 
+  @Test
+  void testUpdateStatsMarksFilteredColumnEvenWhenNDVUnknown() {
+    // HIVE-29625: setFilterColumn() is now called unconditionally for affected columns,
+    // even when NDV is unknown (-1). The NDV math is skipped but the filter mark applies.
+    Statistics stats = new Statistics(1000, 8000, 0, 0);
+    ColStatistics cs = createColStats("col1", -1, 0); // unknown NDV
+    stats.setColumnStats(Collections.singletonList(cs));
+
+    StatsUtils.updateStats(stats, 500, true, null, Collections.singleton("col1"));
+
+    ColStatistics updated = stats.getColumnStats().get(0);
+    assertEquals(true, updated.isFilteredColumn(),
+        "Filter-column flag should be set even when NDV is unknown");
+    assertEquals(-1, updated.getCountDistint(),
+        "Unknown NDV (-1) should be preserved when affected column has no NDV");
+  }
+
+  @Test
+  void testUpdateStatsRecomputesNDVWhenAffectedAndKnown() {
+    // Regression check: when NDV is known and ratio <= 1.0, the NDV math still runs
+    // inside the new oldDV >= 0 guard.
+    Statistics stats = new Statistics(1000, 8000, 0, 0);
+    ColStatistics cs = createColStats("col1", 100, 0); // known NDV
+    stats.setColumnStats(Collections.singletonList(cs));
+
+    StatsUtils.updateStats(stats, 500, true, null, Collections.singleton("col1"));
+
+    ColStatistics updated = stats.getColumnStats().get(0);
+    assertEquals(true, updated.isFilteredColumn(),
+        "Filter-column flag should be set for affected column with known NDV");
+    // ratio = 500/1000 = 0.5 -> newDV = ceil(0.5 * 100) = 50
+    assertEquals(50, updated.getCountDistint(),
+        "Known NDV should be scaled by the row-count ratio");
+  }
+
+  @Test
+  void testScaleColStatisticsPreservesUnknownCountDistint() {
+    // HIVE-29625: when factor < 1.0 and NDV is unknown (-1), the sentinel is preserved.
+    ColStatistics cs = createColStats("col1", -1, 0); // unknown NDV
+    List<ColStatistics> colStats = Collections.singletonList(cs);
+
+    StatsUtils.scaleColStatistics(colStats, 0.5);
+
+    assertEquals(-1, colStats.get(0).getCountDistint(),
+        "Unknown NDV (-1) should be preserved when factor < 1.0");
+  }
+
   @Test
   void testScaleColStatisticsPreservesUnknownNumNulls() {
     ColStatistics cs = createColStats("col1", 100, -1); // unknown numNulls
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java
index 98bc589e40d3..752907f755d8 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/stats/estimator/TestPessimisticStatCombiner.java
@@ -20,8 +20,13 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+import java.util.stream.Stream;
+
 import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
 
 class TestPessimisticStatCombiner {
 
@@ -136,6 +141,31 @@ void testCombineBothUnknownNumNulls() {
     assertEquals(-1, combined.getNumNulls(), "Both unknown should result in unknown (-1)");
   }
 
+  @ParameterizedTest(name = "{0}")
+  @MethodSource("combineCountDistinctCases")
+  void testCombineCountDistinctMerge(String scenarioName, long stat1Ndv, long stat2Ndv, long expectedNdv) {
+    ColStatistics stat1 = createStat("col1", "int", stat1Ndv, 5, 4.0);
+    ColStatistics stat2 = createStat("col2", "int", stat2Ndv, 10, 4.0);
+
+    PessimisticStatCombiner combiner = new PessimisticStatCombiner();
+    combiner.add(stat1);
+    combiner.add(stat2);
+
+    ColStatistics combined = combiner.getResult().get();
+    assertEquals(expectedNdv, combined.getCountDistint(),
+        "countDistinct after PROPAGATE combine");
+  }
+
+  private static Stream<Arguments> combineCountDistinctCases() {
+    return Stream.of(
+        Arguments.of("firstUnknownPropagates",      -1L, 50L, -1L),
+        Arguments.of("secondUnknownPropagates",     50L, -1L, -1L),
+        Arguments.of("bothUnknownStaysUnknown",     -1L, -1L, -1L),
+        Arguments.of("picksHigherWhenSecondHigher", 30L, 50L, 50L),
+        Arguments.of("keepsHigherWhenFirstHigher",  50L, 30L, 50L)
+    );
+  }
+
   @Test
   void testCombineBothUnknownNumTruesAndNumFalses() {
     ColStatistics stat1 = createStat("col1", "boolean", 2, 5, 1.0);
diff --git a/ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out
index d4d9cb53e2b9..0b708705624b 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_types_non_dictionary_encoding_vectorization.q.out
@@ -2414,13 +2414,13 @@ STAGE PLANS:
                       minReductionHashAggr: 0.99
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 150 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: binary)
                         null sort order: z
                         sort order: +
                         Map-reduce partition columns: _col0 (type: binary)
-                        Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 150 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE
                         value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
@@ -2432,16 +2432,16 @@ STAGE PLANS:
                 keys: KEY._col0 (type: binary)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 75 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: hex(_col0) (type: string), _col1 (type: bigint), _col0 (type: binary)
                   outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 75 Data size: 14800 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col2 (type: binary)
                     null sort order: z
                     sort order: +
-                    Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 75 Data size: 14800 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col0 (type: string), _col1 (type: bigint)
         Reducer 3 
             Execution mode: vectorized, llap
@@ -2449,10 +2449,10 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 75 Data size: 14400 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 75 Data size: 14400 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
index 41bc14e5e354..8e8d1d548e0e 100644
--- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out
@@ -137,7 +137,7 @@ STAGE PLANS:
                 TableScan
                   alias: t1
                   filterExpr: bin is not null (type: boolean)
-                  probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:bin, smallTablePos:1, keyRatio:0.0
+                  probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_30_container, bigKeyColName:bin, smallTablePos:1, keyRatio:100.0
                   Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true