diff --git a/src/libraries/Common/src/System/Collections/Generic/BitHelper.cs b/src/libraries/Common/src/System/Collections/Generic/BitHelper.cs index 7cf502c552ecdd..b2912e0822335a 100644 --- a/src/libraries/Common/src/System/Collections/Generic/BitHelper.cs +++ b/src/libraries/Common/src/System/Collections/Generic/BitHelper.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Diagnostics; +using System.Numerics; namespace System.Collections.Generic { @@ -33,6 +34,26 @@ internal void MarkBit(int bitPosition) } } + internal bool TryMarkBit(int bitPosition) + { + Debug.Assert(bitPosition >= 0); + + uint bitArrayIndex = (uint)bitPosition / IntSize; + + // Workaround for https://github.com/dotnet/runtime/issues/72004 + Span span = _span; + if (bitArrayIndex < (uint)span.Length) + { + int bits = span[(int)bitArrayIndex]; + if ((bits & (1 << ((int)((uint)bitPosition % IntSize)))) == 0) + { + span[(int)bitArrayIndex] = bits | (1 << ((int)((uint)bitPosition % IntSize))); + return true; + } + } + return false; + } + internal bool IsMarked(int bitPosition) { Debug.Assert(bitPosition >= 0); @@ -46,7 +67,26 @@ internal bool IsMarked(int bitPosition) (span[(int)bitArrayIndex] & (1 << ((int)((uint)bitPosition % IntSize)))) != 0; } + internal bool IsUnmarked(int bitPosition) + { + Debug.Assert(bitPosition >= 0); + + uint bitArrayIndex = (uint)bitPosition / IntSize; + + // Workaround for https://github.com/dotnet/runtime/issues/72004 + ReadOnlySpan span = _span; + return + bitArrayIndex < (uint)span.Length && + (span[(int)bitArrayIndex] & (1 << ((int)((uint)bitPosition % IntSize)))) == 0; + } + + internal int FindFirstUnmarked() + { + int i = _span.IndexOfAnyExcept(~0); + return i < 0 ? -1 : i * IntSize + BitOperations.TrailingZeroCount(~_span[i]); + } + /// How many ints must be allocated to represent n bits. Returns (n+31)/32, but avoids overflow. - internal static int ToIntArrayLength(int n) => n > 0 ? ((n - 1) / IntSize + 1) : 0; + internal static int ToIntArrayLength(int n) => (int)(((uint)n + 31) / IntSize); } } diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs index 14cc4bb26b3c67..e990cbb2c4f74d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSet.cs @@ -277,6 +277,57 @@ private int FindItemIndex(T item) return -1; } + private bool Contains(ref Entry item) + { + Entry[] entries = _entries!; + uint collisionCount = 0; + IEqualityComparer? comparer = _comparer; + + int hashCode = item.HashCode; + int i = GetBucketRef(hashCode) - 1; // Value in _buckets is 1-based + + // comparer can only be null for value types; enable JIT to eliminate entire if block for ref types + if (typeof(T).IsValueType && comparer == null) + { + while (i >= 0) + { + ref Entry entry = ref entries[i]; + if (entry.HashCode == hashCode && EqualityComparer.Default.Equals(entry.Value, item.Value)) + { + return true; + } + i = entry.Next; + + if (++collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop, which means a concurrent update has happened. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + else + { + Debug.Assert(comparer is not null); + while (i >= 0) + { + ref Entry entry = ref entries[i]; + if (entry.HashCode == hashCode && comparer.Equals(entry.Value, item.Value)) + { + return true; + } + i = entry.Next; + + if (++collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop, which means a concurrent update has happened. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + + return false; + } + /// Gets a reference to the specified hashcode's bucket, containing an index into . [MethodImpl(MethodImplOptions.AggressiveInlining)] private ref int GetBucketRef(int hashCode) @@ -353,6 +404,53 @@ public bool Remove(T item) return false; } + private void RemoveAt(Entry[] entries, int removeIndex) + { + ref Entry entry = ref entries[removeIndex]; + ref int bucket = ref GetBucketRef(entry.HashCode); + int i = bucket - 1; // Value in buckets is 1-based + if (i == removeIndex) + { + bucket = entry.Next + 1; // Value in buckets is 1-based + } + else + { + uint collisionCount = 0; + while (true) + { + if ((uint)i >= (uint)entries.Length) + { + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + + ref Entry last = ref entries[i]; + i = last.Next; + if (i == removeIndex) + { + last.Next = entry.Next; + break; + } + + if (++collisionCount > (uint)entries.Length) + { + // The chain of entries forms a loop; which means a concurrent update has happened. + // Break out of the loop and throw, rather than looping forever. + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + } + } + } + + if (RuntimeHelpers.IsReferenceOrContainsReferences()) + { + entry.Value = default!; + } + + Debug.Assert((StartOfFreeList - _freeList) < 0, "shouldn't underflow because max hashtable length is MaxPrimeArrayLength = 0x7FEFFFFD(2146435069) _freelist underflow threshold 2147483646"); + entry.Next = StartOfFreeList - _freeList; + _freeList = i; + _freeCount++; + } + /// Gets the number of elements that are contained in the set. public int Count => _count - _freeCount; @@ -957,8 +1055,8 @@ public bool IsSubsetOf(IEnumerable other) } } - (int uniqueCount, int unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false); - return uniqueCount == Count && unfoundCount >= 0; + (int uniqueCount, _) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false); + return uniqueCount == Count; } /// Determines whether a object is a proper subset of the specified collection. @@ -1001,11 +1099,11 @@ public bool IsProperSubsetOf(IEnumerable other) } } - (int uniqueCount, int unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: false); + (int uniqueCount, int unfoundCount) = CheckUniqueAndUnfoundElements(other, returnIfUnfound: Count == 0); return uniqueCount == Count && unfoundCount > 0; } - /// Determines whether a object is a proper superset of the specified collection. + /// Determines whether a object is a superset of the specified collection. /// The collection to compare to the current object. /// true if the object is a superset of ; otherwise, false. public bool IsSupersetOf(IEnumerable other) @@ -1030,12 +1128,10 @@ public bool IsSupersetOf(IEnumerable other) return true; } - // Try to compare based on counts alone if other is a hashset with same equality comparer. - if (other is HashSet otherAsSet && - EqualityComparersAreEqual(this, otherAsSet) && - otherAsSet.Count > Count) + // Faster if other is a hashset with the same equality comparer + if (other is HashSet otherAsSet && EqualityComparersAreEqual(this, otherAsSet)) { - return false; + return otherAsSet.Count <= Count && otherAsSet.IsSubsetOfHashSetWithSameComparer(this); } } @@ -1114,6 +1210,18 @@ public bool Overlaps(IEnumerable other) return true; } + // Faster if other is a hashset with the same effective equality comparer + if (other is HashSet otherAsSet && EffectiveEqualityComparersAreEqual(this, otherAsSet)) + { + if (otherAsSet.Count == 0) + { + return false; + } + + return otherAsSet.Count < Count ? otherAsSet.OverlapsHashSetWithSameComparer(this) + : OverlapsHashSetWithSameComparer(otherAsSet); + } + foreach (T element in other) { if (Contains(element)) @@ -1260,11 +1368,6 @@ public IEqualityComparer Comparer } } - /// - /// Similar to but surfaces the actual comparer being used to hash entries. - /// - internal IEqualityComparer EffectiveComparer => _comparer ?? EqualityComparer.Default; - /// Ensures that this hash set can hold the specified number of elements without growing. public int EnsureCapacity(int capacity) { @@ -1538,35 +1641,89 @@ private bool AddIfNotPresent(T value, out int location) /// /// If callers are concerned about whether this is a proper subset, they take care of that. /// - internal bool IsSubsetOfHashSetWithSameComparer(HashSet other) + private bool IsSubsetOfHashSetWithSameComparer(HashSet other) { - foreach (T item in this) + Entry[] entries = _entries!; + int count = _count; + if (count <= 0 || count > entries.Length) + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + + if (typeof(T).IsValueType || EffectiveEqualityComparersAreEqual(this, other)) { - if (!other.Contains(item)) + for (int i = 0; i < count; i++) { - return false; + ref Entry entry = ref entries[i]; + if (entry.Next >= -1 && !other.Contains(ref entry)) + { + return false; + } + } + } + else + { + for (int i = 0; i < count; i++) + { + ref Entry entry = ref entries[i]; + if (entry.Next >= -1 && !other.Contains(entry.Value)) + { + return false; + } } } return true; } + private bool OverlapsHashSetWithSameComparer(HashSet other) + { + Debug.Assert(EffectiveEqualityComparersAreEqual(this, other)); + Entry[] entries = _entries!; + int count = _count; + if (count <= 0 || count > entries.Length) + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + + for (int i = 0; i < count; i++) + { + ref Entry entry = ref entries[i]; + if (entry.Next >= -1 && other.Contains(ref entry)) + { + return true; + } + } + + return false; + } + /// /// If other is a hashset that uses same equality comparer, intersect is much faster /// because we can use other's Contains /// private void IntersectWithHashSetWithSameComparer(HashSet other) { - Entry[]? entries = _entries; - for (int i = 0; i < _count; i++) + Entry[] entries = _entries!; + int count = _count; + if (count <= 0 || count > entries.Length) + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + + if (typeof(T).IsValueType || EffectiveEqualityComparersAreEqual(this, other)) { - ref Entry entry = ref entries![i]; - if (entry.Next >= -1) + for (int i = 0; i < count; i++) { - T item = entry.Value; - if (!other.Contains(item)) + ref Entry entry = ref entries[i]; + if (entry.Next >= -1 && !other.Contains(ref entry)) { - Remove(item); + RemoveAt(entries, i); + } + } + } + else + { + for (int i = 0; i < count; i++) + { + ref Entry entry = ref entries![i]; + if (entry.Next >= -1 && !other.Contains(entry.Value)) + { + RemoveAt(entries, i); } } } @@ -1574,7 +1731,7 @@ private void IntersectWithHashSetWithSameComparer(HashSet other) /// /// Iterate over other. If contained in this, mark an element in bit array corresponding to - /// its position in _slots. If anything is unmarked (in bit array), remove it. + /// its position in _entries. If anything is unmarked (in bit array), remove it. /// /// This attempts to allocate on the stack, if below StackAllocThreshold. /// @@ -1587,9 +1744,8 @@ private void IntersectWithEnumerable(IEnumerable other) int originalCount = _count; int intArrayLength = BitHelper.ToIntArrayLength(originalCount); - Span span = stackalloc int[StackAllocThreshold]; BitHelper bitHelper = (uint)intArrayLength <= StackAllocThreshold ? - new BitHelper(span.Slice(0, intArrayLength), clear: true) : + new BitHelper(stackalloc int[StackAllocThreshold].Slice(0, intArrayLength), clear: true) : new BitHelper(new int[intArrayLength], clear: false); // Mark if contains: find index of in slots array and mark corresponding element in bit array. @@ -1602,14 +1758,20 @@ private void IntersectWithEnumerable(IEnumerable other) } } - // If anything unmarked, remove it. Perf can be optimized here if BitHelper had a - // FindFirstUnmarked method. - for (int i = 0; i < originalCount; i++) + // If anything unmarked, remove it. + int i = bitHelper.FindFirstUnmarked(); + if (i >= 0) { - ref Entry entry = ref _entries![i]; - if (entry.Next >= -1 && !bitHelper.IsMarked(i)) + Entry[] entries = _entries!; + if (originalCount <= 0 || originalCount > entries.Length) + ThrowHelper.ThrowInvalidOperationException_ConcurrentOperationsNotSupported(); + + for (; i < originalCount; i++) { - Remove(entry.Value); + if (entries[i].Next >= -1 && bitHelper.IsUnmarked(i)) + { + RemoveAt(entries, i); + } } } } @@ -1655,20 +1817,20 @@ private void SymmetricExceptWithEnumerable(IEnumerable other) int originalCount = _count; int intArrayLength = BitHelper.ToIntArrayLength(originalCount); - Span itemsToRemoveSpan = stackalloc int[StackAllocThreshold / 2]; - BitHelper itemsToRemove = intArrayLength <= StackAllocThreshold / 2 ? - new BitHelper(itemsToRemoveSpan.Slice(0, intArrayLength), clear: true) : + BitHelper itemsToRemove = (uint)intArrayLength <= StackAllocThreshold / 2 ? + new BitHelper(stackalloc int[StackAllocThreshold / 2].Slice(0, intArrayLength), clear: true) : new BitHelper(new int[intArrayLength], clear: false); - Span itemsAddedFromOtherSpan = stackalloc int[StackAllocThreshold / 2]; - BitHelper itemsAddedFromOther = intArrayLength <= StackAllocThreshold / 2 ? - new BitHelper(itemsAddedFromOtherSpan.Slice(0, intArrayLength), clear: true) : + BitHelper itemsAddedFromOther = (uint)intArrayLength <= StackAllocThreshold / 2 ? + new BitHelper(stackalloc int[StackAllocThreshold / 2].Slice(0, intArrayLength), clear: true) : new BitHelper(new int[intArrayLength], clear: false); + int firstMarkedForRemove = originalCount; foreach (T item in other) { - int location; - if (AddIfNotPresent(item, out location)) + bool added = AddIfNotPresent(item, out int tmp); + int location = tmp; + if (added) { // wasn't already present in collection; flag it as something not to remove // *NOTE* if location is out of range, we should ignore. BitHelper will @@ -1680,22 +1842,25 @@ private void SymmetricExceptWithEnumerable(IEnumerable other) else { // already there...if not added from other, mark for remove. - // *NOTE* Even though BitHelper will check that location is in range, we want - // to check here. There's no point in checking items beyond originalCount - // because they could not have been in the original collection - if (location < originalCount && !itemsAddedFromOther.IsMarked(location)) + // *NOTE* BitHelper will check that location is in range. + if (itemsAddedFromOther.IsUnmarked(location)) { itemsToRemove.MarkBit(location); + if (location < firstMarkedForRemove) + { + firstMarkedForRemove = location; + } } } } // if anything marked, remove it - for (int i = 0; i < originalCount; i++) + Entry[] entries = _entries!; + for (int i = firstMarkedForRemove; i < originalCount; i++) { if (itemsToRemove.IsMarked(i)) { - Remove(_entries![i].Value); + RemoveAt(entries, i); } } } @@ -1725,27 +1890,10 @@ private void SymmetricExceptWithEnumerable(IEnumerable other) /// because unfoundCount must be 0. private (int UniqueCount, int UnfoundCount) CheckUniqueAndUnfoundElements(IEnumerable other, bool returnIfUnfound) { - // Need special case in case this has no elements. - if (_count == 0) - { - int numElementsInOther = 0; - foreach (T item in other) - { - numElementsInOther++; - break; // break right away, all we want to know is whether other has 0 or 1 elements - } - - return (UniqueCount: 0, UnfoundCount: numElementsInOther); - } - - Debug.Assert((_buckets != null) && (_count > 0), "_buckets was null but count greater than 0"); + int intArrayLength = BitHelper.ToIntArrayLength(_count); - int originalCount = _count; - int intArrayLength = BitHelper.ToIntArrayLength(originalCount); - - Span span = stackalloc int[StackAllocThreshold]; - BitHelper bitHelper = intArrayLength <= StackAllocThreshold ? - new BitHelper(span.Slice(0, intArrayLength), clear: true) : + BitHelper bitHelper = (uint)intArrayLength <= StackAllocThreshold ? + new BitHelper(stackalloc int[StackAllocThreshold].Slice(0, intArrayLength), clear: true) : new BitHelper(new int[intArrayLength], clear: false); int unfoundCount = 0; // count of items in other not found in this @@ -1756,10 +1904,8 @@ private void SymmetricExceptWithEnumerable(IEnumerable other) int index = FindItemIndex(item); if (index >= 0) { - if (!bitHelper.IsMarked(index)) + if (bitHelper.TryMarkBit(index)) { - // Item hasn't been seen yet. - bitHelper.MarkBit(index); uniqueFoundCount++; } } @@ -1781,13 +1927,15 @@ private void SymmetricExceptWithEnumerable(IEnumerable other) /// speed up if it knows the other item has unique elements. I.e. if they're using /// different equality comparers, then uniqueness assumption between sets break. /// - internal static bool EqualityComparersAreEqual(HashSet set1, HashSet set2) => set1.Comparer.Equals(set2.Comparer); + private static bool EqualityComparersAreEqual(HashSet set1, HashSet set2) + => set1._comparer == set2._comparer || set1.Comparer.Equals(set2.Comparer); /// /// Checks if effective equality comparers are equal. This is used for algorithms that /// require that both collections use identical hashing implementations for their entries. /// - internal static bool EffectiveEqualityComparersAreEqual(HashSet set1, HashSet set2) => set1.EffectiveComparer.Equals(set2.EffectiveComparer); + private static bool EffectiveEqualityComparersAreEqual(HashSet set1, HashSet set2) + => set1._comparer == set2._comparer || set1._comparer?.Equals(set2._comparer) == true; #endregion diff --git a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSetEqualityComparer.cs b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSetEqualityComparer.cs index 981d7644f5d1ee..3eb0a5515332b9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSetEqualityComparer.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Collections/Generic/HashSetEqualityComparer.cs @@ -22,35 +22,7 @@ public bool Equals(HashSet? x, HashSet? y) return false; } - EqualityComparer defaultComparer = EqualityComparer.Default; - - // If both sets use the same comparer, they're equal if they're the same - // size and one is a "subset" of the other. - if (HashSet.EqualityComparersAreEqual(x, y)) - { - return x.Count == y.Count && y.IsSubsetOfHashSetWithSameComparer(x); - } - - // Otherwise, do an O(N^2) match. - foreach (T yi in y) - { - bool found = false; - foreach (T xi in x) - { - if (defaultComparer.Equals(yi, xi)) - { - found = true; - break; - } - } - - if (!found) - { - return false; - } - } - - return true; + return x.SetEquals(y); } public int GetHashCode(HashSet? obj)