diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DataTrait.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DataTrait.java index 2e367dc8838ac0..c9c0a96869cc73 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DataTrait.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/DataTrait.java @@ -48,7 +48,7 @@ * then values in column 'b' must also be identical. */ public class DataTrait { - + public static int UNIQUE_UNION_LIMIT = 16; public static final DataTrait EMPTY_TRAIT = new DataTrait(new UniqueDescription().toImmutable(), new UniformDescription().toImmutable(), new ImmutableSet.Builder().build(), @@ -138,6 +138,20 @@ public boolean isNullSafeEqual(Slot l, Slot r) { return equalSet.isEqual(l, r); } + /** + * Get all unique slot sets, including those containing nullable slots. + * Each returned set is a unique key of the relation (no two rows share the same value combination, + * under SQL's "NULL is distinct from NULL" semantics in the UNIQUE-set sense). + */ + public List> getAllUniqueSets() { + List> res = new ArrayList<>(uniqueSet.uniqueSlots.size() + uniqueSet.combinedUniqueSlotSet.size()); + for (Slot slot : uniqueSet.uniqueSlots) { + res.add(ImmutableSet.of(slot)); + } + res.addAll(uniqueSet.combinedUniqueSlotSet); + return res; + } + public FuncDeps getAllValidFuncDeps(Set validSlots) { return fdDg.findValidFuncDeps(validSlots); } @@ -268,23 +282,23 @@ public void addDepsByEqualSet(Set equalSet) { /** * Extends a unique slot using an equivalence set. - * Within slots, if any slot in the equivalence set is unique, + * Within uniqueSlots, if any slot in the equivalence set is unique, * then all slots in the set are considered unique. - * For slotSets, if there is an intersection with the equivalence set, + * For combinedUniqueSlotSet, if there is an intersection with the equivalence set, * the slotSet can be substituted with the equivalence set. * Example: * Given an equivalence set {a1, a2, a3} and a uniqueSet {a1, b1, c1}, * the sets {a2, b1, c1} and {a3, b1, c1} are also treated as unique. */ public void addUniqueByEqualSet(Set equalSet) { - if (uniqueSet.isIntersect(equalSet, uniqueSet.slots)) { - uniqueSet.slots.addAll(equalSet); + if (uniqueSet.isIntersect(equalSet, uniqueSet.uniqueSlots)) { + uniqueSet.uniqueSlots.addAll(equalSet); return; } - for (Set slotSet : uniqueSet.slotSets) { - Set intersection = Sets.intersection(equalSet, uniqueSet.slots); + for (Set slotSet : uniqueSet.combinedUniqueSlotSet) { + Set intersection = Sets.intersection(equalSet, uniqueSet.uniqueSlots); if (intersection.size() > 2) { - uniqueSet.slotSets.remove(slotSet); + uniqueSet.combinedUniqueSlotSet.remove(slotSet); slotSet.removeAll(intersection); for (Slot slot : equalSet) { ImmutableSet uniqueSlotSet @@ -330,12 +344,12 @@ public List> calEqualSetList() { */ public List> getAllUniqueAndNotNull() { List> res = new ArrayList<>(); - for (Slot slot : uniqueSet.slots) { + for (Slot slot : uniqueSet.uniqueSlots) { if (!slot.nullable()) { res.add(ImmutableSet.of(slot)); } } - for (Set slotSet : uniqueSet.slotSets) { + for (Set slotSet : uniqueSet.combinedUniqueSlotSet) { boolean containsNullable = false; for (Slot slot : slotSet) { if (slot.nullable()) { @@ -380,7 +394,7 @@ public DataTrait build() { public void pruneSlots(Set outputSlots) { uniformSet.removeNotContain(outputSlots); - uniqueSet.removeNotContain(outputSlots); + uniqueSet.removeNotContain(outputSlots, equalSetBuilder.build()); equalSetBuilder.removeNotContain(outputSlots); fdDgBuilder.removeNotContain(outputSlots); } @@ -404,65 +418,99 @@ public void replaceEqualSetBy(Map replaceMap) { public void replaceFuncDepsBy(Map replaceMap) { fdDgBuilder.replace(replaceMap); } + + /** replace uniqueSet.slotSets slot to root in equalSets */ + public void normalizeUniqueSetsToEqualSetRoot() { + ImmutableEqualSet equalSet = equalSetBuilder.build(); + Set> newSlotSets = new HashSet<>(); + for (ImmutableSet uniqueSet : uniqueSet.combinedUniqueSlotSet) { + ImmutableSet.Builder roots = ImmutableSet.builder(); + for (Slot slot : uniqueSet) { + Slot root = equalSet.getRoot(slot); + if (root != null) { + roots.add(root); + } else { + roots.add(slot); + } + } + newSlotSets.add(roots.build()); + } + uniqueSet.combinedUniqueSlotSet = newSlotSets; + } } static class UniqueDescription { - Set slots; - Set> slotSets; + Set uniqueSlots; + Set> combinedUniqueSlotSet; UniqueDescription() { - slots = new HashSet<>(); - slotSets = new HashSet<>(); + uniqueSlots = new HashSet<>(); + combinedUniqueSlotSet = new HashSet<>(); } UniqueDescription(UniqueDescription o) { - this.slots = new HashSet<>(o.slots); - this.slotSets = new HashSet<>(o.slotSets); + this.uniqueSlots = new HashSet<>(o.uniqueSlots); + this.combinedUniqueSlotSet = new HashSet<>(o.combinedUniqueSlotSet); } UniqueDescription(Set slots, Set> slotSets) { - this.slots = slots; - this.slotSets = slotSets; + this.uniqueSlots = slots; + this.combinedUniqueSlotSet = slotSets; } public boolean contains(Slot slot) { - return slots.contains(slot); + return uniqueSlots.contains(slot); } public boolean contains(Set slotSet) { if (slotSet.size() == 1) { - return slots.contains(slotSet.iterator().next()); + return uniqueSlots.contains(slotSet.iterator().next()); } - return slotSets.contains(ImmutableSet.copyOf(slotSet)); + return combinedUniqueSlotSet.contains(ImmutableSet.copyOf(slotSet)); } public boolean containsAnySub(Set slotSet) { - return slotSet.stream().anyMatch(s -> slots.contains(s)) - || slotSets.stream().anyMatch(slotSet::containsAll); + return slotSet.stream().anyMatch(s -> uniqueSlots.contains(s)) + || combinedUniqueSlotSet.stream().anyMatch(slotSet::containsAll); } - public void removeNotContain(Set slotSet) { - if (!slotSet.isEmpty()) { - Set newSlots = Sets.newLinkedHashSetWithExpectedSize(slots.size()); - for (Slot slot : slots) { - if (slotSet.contains(slot)) { + public void removeNotContain(Set outputSlots, ImmutableEqualSet equalSet) { + if (!outputSlots.isEmpty()) { + Set newSlots = Sets.newLinkedHashSetWithExpectedSize(uniqueSlots.size()); + for (Slot slot : uniqueSlots) { + if (outputSlots.contains(slot)) { newSlots.add(slot); } } - this.slots = newSlots; - - Set> newSlotSets = Sets.newLinkedHashSetWithExpectedSize(slots.size()); - for (ImmutableSet set : slotSets) { - if (slotSet.containsAll(set)) { - newSlotSets.add(set); + this.uniqueSlots = newSlots; + + Set> newCombinedUniqueSlotSet = Sets.newHashSetWithExpectedSize(uniqueSlots.size()); + for (ImmutableSet combinedUniqueSlots : combinedUniqueSlotSet) { + ImmutableSet.Builder builder = ImmutableSet.builder(); + boolean allCanFindReplacement = true; + for (Slot slot : combinedUniqueSlots) { + if (outputSlots.contains(slot)) { + builder.add(slot); + } else { + Set equalSlots = equalSet.calEqualSet(slot); + Set replaceSlots = Sets.intersection(outputSlots, equalSlots); + if (!replaceSlots.isEmpty()) { + builder.add(replaceSlots.iterator().next()); + } else { + allCanFindReplacement = false; + } + } + } + if (allCanFindReplacement) { + newCombinedUniqueSlotSet.add(builder.build()); } } - this.slotSets = newSlotSets; + this.combinedUniqueSlotSet = newCombinedUniqueSlotSet; } } public void add(Slot slot) { - slots.add(slot); + uniqueSlots.add(slot); } public void add(ImmutableSet slotSet) { @@ -470,15 +518,15 @@ public void add(ImmutableSet slotSet) { return; } if (slotSet.size() == 1) { - slots.add(slotSet.iterator().next()); + uniqueSlots.add(slotSet.iterator().next()); return; } - slotSets.add(slotSet); + combinedUniqueSlotSet.add(slotSet); } public void add(UniqueDescription uniqueDescription) { - slots.addAll(uniqueDescription.slots); - slotSets.addAll(uniqueDescription.slotSets); + uniqueSlots.addAll(uniqueDescription.uniqueSlots); + combinedUniqueSlotSet.addAll(uniqueDescription.combinedUniqueSlotSet); } public boolean isIntersect(Set set1, Set set2) { @@ -496,26 +544,26 @@ public boolean isIntersect(Set set1, Set set2) { } public boolean isEmpty() { - return slots.isEmpty() && slotSets.isEmpty(); + return uniqueSlots.isEmpty() && combinedUniqueSlotSet.isEmpty(); } @Override public String toString() { - return "{" + slots + slotSets + "}"; + return "{" + uniqueSlots + combinedUniqueSlotSet + "}"; } public void replace(Map replaceMap) { - slots = slots.stream() + uniqueSlots = uniqueSlots.stream() .map(s -> replaceMap.getOrDefault(s, s)) .collect(Collectors.toSet()); - slotSets = slotSets.stream() + combinedUniqueSlotSet = combinedUniqueSlotSet.stream() .map(set -> set.stream().map(s -> replaceMap.getOrDefault(s, s)) .collect(ImmutableSet.toImmutableSet())) .collect(Collectors.toSet()); } public UniqueDescription toImmutable() { - return new UniqueDescription(ImmutableSet.copyOf(slots), ImmutableSet.copyOf(slotSets)); + return new UniqueDescription(ImmutableSet.copyOf(uniqueSlots), ImmutableSet.copyOf(combinedUniqueSlotSet)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java index 7d174ff28bc0f1..07e6b77728612b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/Plan.java @@ -281,6 +281,8 @@ default DataTrait computeDataTrait() { computeEqualSet(fdBuilder); computeFd(fdBuilder); + fdBuilder.normalizeUniqueSetsToEqualSetRoot(); + for (Slot slot : getOutput()) { Set o = ImmutableSet.of(slot); // all slots dependent unique slot @@ -298,6 +300,7 @@ default DataTrait computeDataTrait() { fdBuilder.addUniformByEqualSet(validEqualSet); fdBuilder.addUniqueByEqualSet(validEqualSet); } + Set output = this.getOutputSet(); for (Plan child : children()) { if (!output.containsAll(child.getOutputSet())) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java index f2867ea6d6c0da..822a59e635d55d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalJoin.java @@ -21,6 +21,7 @@ import org.apache.doris.nereids.hint.DistributeHint; import org.apache.doris.nereids.jobs.joinorder.hypergraph.bitmap.LongBitmap; import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.DataTrait; import org.apache.doris.nereids.properties.DataTrait.Builder; import org.apache.doris.nereids.properties.LogicalProperties; import org.apache.doris.nereids.rules.exploration.join.JoinReorderContext; @@ -635,6 +636,37 @@ public void computeUnique(Builder builder) { } else if (joinType.isRightSemiOrAntiJoin() || joinType.isAsofRightJoin()) { builder.addUniqueSlot(right().getLogicalProperties().getTrait()); } + + // Union propagation: + // For INNER / CROSS / LEFT_OUTER / RIGHT_OUTER / FULL_OUTER joins, if the left side + // is unique on U_L and the right side is unique on U_R, then the join output is unique + // on U_L ∪ U_R. + // Proof sketch (INNER): two output rows (l_a, r_a) and (l_b, r_b) agreeing on U_L ∪ U_R + // must agree on U_L (so l_a = l_b by L's uniqueness on U_L) and on U_R (so r_a = r_b), + // hence the two rows are identical. + if (joinType.isInnerJoin() || joinType.isCrossJoin() + || joinType.isLeftOuterJoin() || joinType.isRightOuterJoin() + || joinType.isFullOuterJoin()) { + List> leftUniqueSets = + left().getLogicalProperties().getTrait().getAllUniqueSets(); + List> rightUniqueSets = + right().getLogicalProperties().getTrait().getAllUniqueSets(); + if (!leftUniqueSets.isEmpty() && !rightUniqueSets.isEmpty()) { + int count = 0; + outer: + for (Set leftUnique : leftUniqueSets) { + for (Set rightUnique : rightUniqueSets) { + if (count >= DataTrait.UNIQUE_UNION_LIMIT) { + break outer; + } + builder.addUniqueSlot(ImmutableSet.builder() + .addAll(leftUnique).addAll(rightUnique).build()); + count++; + } + } + } + } + // if there is non-equal join conditions, don't propagate unique if (hashJoinConjuncts.isEmpty()) { return; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ImmutableEqualSet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ImmutableEqualSet.java index 884cef8a5f71b2..de7b6ea613b963 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ImmutableEqualSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/ImmutableEqualSet.java @@ -48,6 +48,7 @@ public static ImmutableEqualSet empty() { */ public static class Builder { private Map parent; + private ImmutableEqualSet built; Builder(Map parent) { this.parent = parent; @@ -65,6 +66,7 @@ public Builder(ImmutableEqualSet equalSet) { * replace all key value according replace map */ public void replace(Map replaceMap) { + built = null; Map newMap = new LinkedHashMap<>(); for (Entry entry : parent.entrySet()) { newMap.put(replaceMap.getOrDefault(entry.getKey(), entry.getKey()), @@ -78,6 +80,7 @@ public void replace(Map replaceMap) { * @param containSet the set to contain */ public void removeNotContain(Set containSet) { + built = null; List> equalSetList = calEqualSetList(); this.parent.clear(); for (Set equalSet : equalSetList) { @@ -98,6 +101,7 @@ public void removeNotContain(Set containSet) { * Add a equal pair */ public void addEqualPair(T a, T b) { + built = null; if (!parent.containsKey(a)) { parent.put(a, a); } @@ -136,6 +140,7 @@ public List> calEqualSetList() { } public void addEqualSet(ImmutableEqualSet equalSet) { + built = null; this.parent.putAll(equalSet.root); } @@ -146,15 +151,23 @@ private T findRoot(T a) { return findRoot(parent.get(a)); } + /** compute if built is null */ public ImmutableEqualSet build() { - ImmutableMap.Builder foldMapBuilder = new ImmutableMap.Builder<>(); - for (T k : parent.keySet()) { - foldMapBuilder.put(k, findRoot(k)); + if (built == null) { + ImmutableMap.Builder foldMapBuilder = new ImmutableMap.Builder<>(); + for (T k : parent.keySet()) { + foldMapBuilder.put(k, findRoot(k)); + } + built = new ImmutableEqualSet<>(foldMapBuilder.build()); } - return new ImmutableEqualSet<>(foldMapBuilder.build()); + return built; } } + public T getRoot(T node) { + return root.get(node); + } + /** * Calculate equal set for a except self */ diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java index d91a0ed9eb93d5..de7167af2c76bd 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniqueTest.java @@ -266,26 +266,31 @@ void testJoin() { Assertions.assertTrue(plan.getLogicalProperties() .getTrait().isUniqueAndNotNull(plan.getOutput().get(1))); + // Even though the hash key (uni.name) is not unique, the join output is still unique on + // {agg.id, uni.id} because both inputs are unique on their respective ids + // (union propagation: U_L ∪ U_R is unique). plan = PlanChecker.from(connectContext) .analyze("select uni.id, agg.id from agg inner join uni " + "on agg.id = uni.name") .rewrite() .getPlan(); - Assertions.assertFalse(plan.getLogicalProperties() + Assertions.assertTrue(plan.getLogicalProperties() .getTrait().isUnique(plan.getOutputSet())); + // Non-equal join condition: union propagation does not depend on hash equi conjuncts. plan = PlanChecker.from(connectContext) .analyze("select uni.id, agg.id from agg inner join uni " + "on agg.id < uni.id") .rewrite() .getPlan(); - Assertions.assertFalse(plan.getLogicalProperties() + Assertions.assertTrue(plan.getLogicalProperties() .getTrait().isUnique(plan.getOutputSet())); + // Disjunctive condition: still unique on {agg.id, uni.id} by union propagation. plan = PlanChecker.from(connectContext) .analyze("select uni.id, agg.id from agg inner join uni " + "on agg.id = uni.id or agg.name > uni.name") .rewrite() .getPlan(); - Assertions.assertFalse(plan.getLogicalProperties() + Assertions.assertTrue(plan.getLogicalProperties() .getTrait().isUnique(plan.getOutputSet())); plan = PlanChecker.from(connectContext) .analyze("select uni.id, agg.id from agg inner join uni " @@ -298,6 +303,81 @@ void testJoin() { .getTrait().isUniqueAndNotNull(plan.getOutput().get(1))); } + @Test + void testJoinUniqueUnionPropagation() { + // INNER: union propagation { agg.id } ∪ { uni.id } -> output unique on {agg.id, uni.id} + // even when hash key (uni.name) is not unique on its side + Plan plan = PlanChecker.from(connectContext) + .analyze("select agg.id, uni.id from agg inner join uni on agg.id = uni.name") + .rewrite() + .getPlan(); + Assertions.assertTrue(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + + // CROSS: union propagation does not depend on equi conjuncts + plan = PlanChecker.from(connectContext) + .analyze("select agg.id, uni.id from agg cross join uni") + .rewrite() + .getPlan(); + Assertions.assertTrue(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + + // LEFT OUTER: union propagation also applies (NULL distinct in unique-set semantics) + plan = PlanChecker.from(connectContext) + .analyze("select agg.id, uni.id from agg left outer join uni on agg.id = uni.name") + .rewrite() + .getPlan(); + Assertions.assertTrue(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + + // RIGHT OUTER + plan = PlanChecker.from(connectContext) + .analyze("select agg.id, uni.id from agg right outer join uni on agg.id = uni.name") + .rewrite() + .getPlan(); + Assertions.assertTrue(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + + // FULL OUTER + plan = PlanChecker.from(connectContext) + .analyze("select agg.id, uni.id from agg full outer join uni on agg.id = uni.name") + .rewrite() + .getPlan(); + Assertions.assertTrue(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + + // Equal-set canonicalization: agg.id = uni.id, output {agg.id} alone should be unique + // because { agg.id, uni.id } gets canonicalized via the equal set so a single + // representative slot in the output identifies the row. + plan = PlanChecker.from(connectContext) + .analyze("select t1.id2, t1.name,t2.name from (select distinct id2,name from agg) t1 inner join (select distinct id2,name from uni) t2 on t1.id2 = t2.id2") + .rewrite() + .getPlan(); + Assertions.assertTrue(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + plan = PlanChecker.from(connectContext) + .analyze("select t2.id2, t1.name,t2.name,t3.name from (select distinct id2,name from agg) t1 inner join (select distinct id2,name from uni) t2 on t1.id2 = t2.id2 inner join (select distinct id2,name from uni) t3 on t2.id2=t3.id2") + .rewrite() + .getPlan(); + Assertions.assertTrue(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + plan = PlanChecker.from(connectContext) + .analyze("select t3.id2,t2.id2, t1.name,t2.name from (select distinct id2,name from agg) t1 inner join (select distinct id2,name from uni) t2 on t1.id2 = t2.id2 inner join (select distinct id2,name from uni) t3 on t2.name=t3.name") + .rewrite() + .getPlan(); + Assertions.assertTrue(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + + // Negative: when output drops both join keys and lacks any unique combination + // covering both sides, it must NOT be considered unique. + plan = PlanChecker.from(connectContext) + .analyze("select agg.name, uni.name from agg inner join uni on agg.id = uni.id") + .rewrite() + .getPlan(); + Assertions.assertFalse(plan.getLogicalProperties() + .getTrait().isUnique(plan.getOutputSet())); + } + @Test void testOneRowRelation() { Plan plan = PlanChecker.from(connectContext)