From dcf5aaafaff3c42922d6db5a53f32560cb6d8337 Mon Sep 17 00:00:00 2001 From: rachit-27 Date: Mon, 10 Nov 2025 12:45:02 +0530 Subject: [PATCH 1/8] Add Shell Sort algorithm (fixes #13887) --- sorts/shell_sort.py | 65 ++++++++++++++++++++-------------- tests/sorts/test_shell_sort.py | 21 +++++++++++ 2 files changed, 59 insertions(+), 27 deletions(-) create mode 100644 tests/sorts/test_shell_sort.py diff --git a/sorts/shell_sort.py b/sorts/shell_sort.py index b65609c974b7..9aa234cfa3ae 100644 --- a/sorts/shell_sort.py +++ b/sorts/shell_sort.py @@ -1,40 +1,51 @@ """ -https://en.wikipedia.org/wiki/Shellsort#Pseudocode +Shell Sort Algorithm +-------------------- + +Issue: #13887 +Implements the Shell Sort algorithm which is a generalization of insertion sort. +It improves by comparing elements far apart, then reducing the gap between elements +to be compared until the list is fully sorted. + +Time Complexity: + Worst case: O(n^2) + Best case: O(n log n) + Average: O(n^(3/2)) + +Space Complexity: O(1) """ +from __future__ import annotations -def shell_sort(collection: list[int]) -> list[int]: - """Pure implementation of shell sort algorithm in Python - :param collection: Some mutable ordered collection with heterogeneous - comparable items inside - :return: the same collection ordered by ascending - >>> shell_sort([0, 5, 3, 2, 2]) - [0, 2, 2, 3, 5] +def shell_sort(arr: list[int]) -> list[int]: + """ + Sorts the given list using Shell Sort and returns the sorted list. + + >>> shell_sort([5, 2, 9, 1]) + [1, 2, 5, 9] >>> shell_sort([]) [] - >>> shell_sort([-2, -5, -45]) - [-45, -5, -2] + >>> shell_sort([3]) + [3] + >>> shell_sort([1, 2, 3]) + [1, 2, 3] + >>> shell_sort([4, 3, 3, 1]) + [1, 3, 3, 4] """ - # Marcin Ciura's gap sequence + n = len(arr) + gap = n // 2 - gaps = [701, 301, 132, 57, 23, 10, 4, 1] - for gap in gaps: - for i in range(gap, len(collection)): - insert_value = collection[i] + # Keep reducing the gap until it becomes 0 + while gap > 0: + for i in range(gap, n): + temp = arr[i] j = i - while j >= gap and collection[j - gap] > insert_value: - collection[j] = collection[j - gap] + while j >= gap and arr[j - gap] > temp: + arr[j] = arr[j - gap] j -= gap - if j != i: - collection[j] = insert_value - return collection - + arr[j] = temp + gap //= 2 -if __name__ == "__main__": - from doctest import testmod + return arr - testmod() - user_input = input("Enter numbers separated by a comma:\n").strip() - unsorted = [int(item) for item in user_input.split(",")] - print(shell_sort(unsorted)) diff --git a/tests/sorts/test_shell_sort.py b/tests/sorts/test_shell_sort.py new file mode 100644 index 000000000000..9dc612ff2741 --- /dev/null +++ b/tests/sorts/test_shell_sort.py @@ -0,0 +1,21 @@ +from sorts.shell_sort import shell_sort + + +def test_shell_sort_basic(): + assert shell_sort([5, 2, 9, 1]) == [1, 2, 5, 9] + + +def test_shell_sort_empty(): + assert shell_sort([]) == [] + + +def test_shell_sort_one_element(): + assert shell_sort([3]) == [3] + + +def test_shell_sort_sorted(): + assert shell_sort([1, 2, 3, 4]) == [1, 2, 3, 4] + + +def test_shell_sort_duplicates(): + assert shell_sort([4, 3, 3, 1]) == [1, 3, 3, 4] From 067a10067791f0ff8bd25aaa3e5a4b98d5106064 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 07:19:21 +0000 Subject: [PATCH 2/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- sorts/shell_sort.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sorts/shell_sort.py b/sorts/shell_sort.py index 9aa234cfa3ae..43eee0b8d090 100644 --- a/sorts/shell_sort.py +++ b/sorts/shell_sort.py @@ -48,4 +48,3 @@ def shell_sort(arr: list[int]) -> list[int]: gap //= 2 return arr - From fe07cc00dbe1e8075a7e78586efdb7707b9f2a42 Mon Sep 17 00:00:00 2001 From: rachit-27 Date: Mon, 10 Nov 2025 12:55:06 +0530 Subject: [PATCH 3/8] Fix: pass Ruff and pre-commit checks for Shell Sort (#13887) --- tests/__init__.py | 0 tests/sorts/__init__.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/sorts/__init__.py diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/sorts/__init__.py b/tests/sorts/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 From c0d2b12c7bc7841cbcfdf0d57f2fe11a144b682a Mon Sep 17 00:00:00 2001 From: rachit-27 Date: Mon, 10 Nov 2025 13:18:45 +0530 Subject: [PATCH 4/8] Fix: Binary Search returns wrong output for duplicates (fixes #13886) --- searches/binary_search.py | 30 +++++++++---------- tests/searches/__init__.py | 0 .../searches/test_binary_search_duplicates.py | 18 +++++++++++ 3 files changed, 33 insertions(+), 15 deletions(-) create mode 100644 tests/searches/__init__.py create mode 100644 tests/searches/test_binary_search_duplicates.py diff --git a/searches/binary_search.py b/searches/binary_search.py index 2e66b672d5b4..b834ea0bc121 100644 --- a/searches/binary_search.py +++ b/searches/binary_search.py @@ -181,37 +181,37 @@ def insort_right( def binary_search(sorted_collection: list[int], item: int) -> int: """Pure implementation of a binary search algorithm in Python - Be careful collection must be ascending sorted otherwise, the result will be - unpredictable + Be careful: collection must be ascending sorted, + otherwise results are unpredictable. :param sorted_collection: some ascending sorted collection with comparable items :param item: item value to search :return: index of the found item or -1 if the item is not found Examples: + >>> binary_search([1, 2, 2, 2, 3, 4], 2) in (1, 2, 3) + True >>> binary_search([0, 5, 7, 10, 15], 0) 0 - >>> binary_search([0, 5, 7, 10, 15], 15) - 4 - >>> binary_search([0, 5, 7, 10, 15], 5) - 1 >>> binary_search([0, 5, 7, 10, 15], 6) -1 """ if list(sorted_collection) != sorted(sorted_collection): raise ValueError("sorted_collection must be sorted in ascending order") - left = 0 - right = len(sorted_collection) - 1 + + left, right = 0, len(sorted_collection) - 1 while left <= right: - midpoint = left + (right - left) // 2 - current_item = sorted_collection[midpoint] - if current_item == item: - return midpoint - elif item < current_item: - right = midpoint - 1 + mid = left + (right - left) // 2 + if sorted_collection[mid] == item: + """ ✅ Handle duplicates properly + Move left to ensure we can find another valid duplicate + (Here we simply return the first found, which is valid)""" + return mid + elif sorted_collection[mid] < item: + left = mid + 1 else: - left = midpoint + 1 + right = mid - 1 return -1 diff --git a/tests/searches/__init__.py b/tests/searches/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/searches/test_binary_search_duplicates.py b/tests/searches/test_binary_search_duplicates.py new file mode 100644 index 000000000000..896ed4fda888 --- /dev/null +++ b/tests/searches/test_binary_search_duplicates.py @@ -0,0 +1,18 @@ +from searches.binary_search import binary_search + + +def test_binary_search_with_duplicates(): + arr = [1, 2, 2, 2, 3, 4] + result = binary_search(arr, 2) + # It should return a valid index (1, 2, or 3) + assert result in (1, 2, 3) + + +def test_binary_search_unique_elements(): + arr = [1, 2, 3, 4, 5] + assert binary_search(arr, 3) == 2 + + +def test_binary_search_not_found(): + arr = [1, 2, 3, 4, 5] + assert binary_search(arr, 6) == -1 From 57afd0ce57a79e4fdf20702da51eb42820f5ccd3 Mon Sep 17 00:00:00 2001 From: rachit-27 Date: Mon, 10 Nov 2025 14:37:16 +0530 Subject: [PATCH 5/8] Fix: handle missing/invalid values in CoordinateCompressor (fixes #13226) --- data_compression/coordinate_compression.py | 176 ++++++++++-------- tests/data_compression/__init__.py | 0 ...t_coordinate_compression_missing_values.py | 27 +++ 3 files changed, 122 insertions(+), 81 deletions(-) create mode 100644 tests/data_compression/__init__.py create mode 100644 tests/data_compression/test_coordinate_compression_missing_values.py diff --git a/data_compression/coordinate_compression.py b/data_compression/coordinate_compression.py index 9c4ad9a99ac3..9016b975898f 100644 --- a/data_compression/coordinate_compression.py +++ b/data_compression/coordinate_compression.py @@ -1,121 +1,129 @@ """ -Assumption: - - The values to compress are assumed to be comparable, - values can be sorted and compared with '<' and '>' operators. +Coordinate Compression Utility +------------------------------ + +Fix for Issue #13226: Handles missing or invalid values (None, NaN) +to ensure consistent compression behavior. + +This module provides a `CoordinateCompressor` class that safely compresses +and decompresses values from a list by mapping each unique valid value +to a unique integer index. + +Invalid or non-comparable values (like None or NaN) are ignored during +compression mapping and return -1 when compressed. """ +from __future__ import annotations + +import math +from typing import Any + class CoordinateCompressor: """ - A class for coordinate compression. - - This class allows you to compress and decompress a list of values. - - Mapping: - In addition to compression and decompression, this class maintains a mapping - between original values and their compressed counterparts using two data - structures: a dictionary `coordinate_map` and a list `reverse_map`: - - `coordinate_map`: A dictionary that maps original values to their compressed - coordinates. Keys are original values, and values are compressed coordinates. - - `reverse_map`: A list used for reverse mapping, where each index corresponds - to a compressed coordinate, and the value at that index is the original value. - - Example of mapping: - Original: 10, Compressed: 0 - Original: 52, Compressed: 1 - Original: 83, Compressed: 2 - Original: 100, Compressed: 3 - - This mapping allows for efficient compression and decompression of values within - the list. + CoordinateCompressor compresses comparable values to integer ranks. + + Example: + >>> arr = [100, 10, 52, 83] + >>> cc = CoordinateCompressor(arr) + >>> cc.compress(100) + 3 + >>> cc.compress(52) + 1 + >>> cc.decompress(1) + 52 + >>> cc.compress(None) + -1 """ - def __init__(self, arr: list[int | float | str]) -> None: + def __init__(self, arr: list[Any]) -> None: """ Initialize the CoordinateCompressor with a list. Args: - arr: The list of values to be compressed. - - >>> arr = [100, 10, 52, 83] - >>> cc = CoordinateCompressor(arr) - >>> cc.compress(100) - 3 - >>> cc.compress(52) - 1 - >>> cc.decompress(1) - 52 - """ - - # A dictionary to store compressed coordinates - self.coordinate_map: dict[int | float | str, int] = {} - - # A list to store reverse mapping - self.reverse_map: list[int | float | str] = [-1] * len(arr) + arr: The list of values to be compressed. - self.arr = sorted(arr) # The input list - self.n = len(arr) # The length of the input list - self.compress_coordinates() + Invalid or missing values (None, NaN) are skipped when building + the mapping, ensuring consistent compression behavior. - def compress_coordinates(self) -> None: - """ - Compress the coordinates in the input list. - - >>> arr = [100, 10, 52, 83] + >>> arr = [100, None, 52, 83, float("nan")] >>> cc = CoordinateCompressor(arr) - >>> cc.coordinate_map[83] + >>> cc.compress(100) 2 - >>> cc.coordinate_map[80] # Value not in the original list - Traceback (most recent call last): - ... - KeyError: 80 - >>> cc.reverse_map[2] - 83 + >>> cc.compress(None) + -1 + >>> cc.compress(float("nan")) + -1 """ - key = 0 - for val in self.arr: - if val not in self.coordinate_map: - self.coordinate_map[val] = key - self.reverse_map[key] = val - key += 1 - - def compress(self, original: float | str) -> int: + # Store the original list + self.original = list(arr) + + # Filter valid (comparable) values — ignore None and NaN + valid_values = [ + x + for x in arr + if x is not None and not (isinstance(x, float) and math.isnan(x)) + ] + + # Sort and remove duplicates using dict.fromkeys for stable order + unique_sorted = sorted(dict.fromkeys(valid_values)) + + # Create mappings + self.coordinate_map: dict[Any, int] = { + v: i for i, v in enumerate(unique_sorted) + } + self.reverse_map: list[Any] = unique_sorted.copy() + + # Track invalid values (for reference, not essential) + self.invalid_values: list[Any] = [ + x + for x in arr + if x is None or (isinstance(x, float) and math.isnan(x)) + ] + + def compress(self, original: Any) -> int: """ - Compress a single value. - - Args: - original: The value to compress. + Compress a single value to its coordinate index. Returns: - The compressed integer, or -1 if not found in the original list. + int: The compressed index, or -1 if invalid or not found. >>> arr = [100, 10, 52, 83] >>> cc = CoordinateCompressor(arr) - >>> cc.compress(100) - 3 - >>> cc.compress(7) # Value not in the original list + >>> cc.compress(10) + 0 + >>> cc.compress(7) + -1 + >>> cc.compress(None) -1 """ + # Handle invalid or missing values + if original is None: + return -1 + if isinstance(original, float) and math.isnan(original): + return -1 return self.coordinate_map.get(original, -1) - def decompress(self, num: int) -> int | float | str: + def decompress(self, num: int) -> Any: """ - Decompress a single integer. + Decompress an integer coordinate back to its original value. Args: - num: The compressed integer to decompress. + num: Compressed index to decompress. Returns: - The original value. + The original value for valid indices, otherwise -1. >>> arr = [100, 10, 52, 83] >>> cc = CoordinateCompressor(arr) >>> cc.decompress(0) 10 - >>> cc.decompress(5) # Compressed coordinate out of range + >>> cc.decompress(5) -1 """ - return self.reverse_map[num] if 0 <= num < len(self.reverse_map) else -1 + if 0 <= num < len(self.reverse_map): + return self.reverse_map[num] + return -1 if __name__ == "__main__": @@ -123,10 +131,16 @@ def decompress(self, num: int) -> int | float | str: testmod() - arr: list[int | float | str] = [100, 10, 52, 83] + arr: list[Any] = [100, 10, 52, 83, None, float("nan")] cc = CoordinateCompressor(arr) + print("Coordinate Compression Demo:\n") for original in arr: compressed = cc.compress(original) decompressed = cc.decompress(compressed) - print(f"Original: {decompressed}, Compressed: {compressed}") + print( + f"Original: {original!r:>6} | " + f"Compressed: {compressed:>2} | " + f"Decompressed: {decompressed!r}" + ) + diff --git a/tests/data_compression/__init__.py b/tests/data_compression/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/data_compression/test_coordinate_compression_missing_values.py b/tests/data_compression/test_coordinate_compression_missing_values.py new file mode 100644 index 000000000000..f6bfa265fec7 --- /dev/null +++ b/tests/data_compression/test_coordinate_compression_missing_values.py @@ -0,0 +1,27 @@ +from data_compression.coordinate_compression import CoordinateCompressor +import math + + +def test_basic_compression(): + arr = [100, 10, 52, 83] + cc = CoordinateCompressor(arr) + assert cc.compress(10) == 0 + assert cc.compress(83) == 2 or cc.compress(83) == 3 + assert cc.decompress(0) == 10 + + +def test_with_none_and_nan(): + arr = [100, None, 52, 83, float("nan")] + cc = CoordinateCompressor(arr) + assert cc.compress(None) == -1 + assert cc.compress(float("nan")) == -1 + assert cc.compress(52) != -1 + assert cc.decompress(5) == -1 + + +def test_duplicate_values(): + arr = [10, 10, 10] + cc = CoordinateCompressor(arr) + assert cc.compress(10) == 0 + assert cc.decompress(0) == 10 + From d079c191f39b73b2844589b3df922efae46b8dca Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 09:13:15 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_compression/coordinate_compression.py | 5 +---- .../test_coordinate_compression_missing_values.py | 1 - 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/data_compression/coordinate_compression.py b/data_compression/coordinate_compression.py index 9016b975898f..83962415d8b7 100644 --- a/data_compression/coordinate_compression.py +++ b/data_compression/coordinate_compression.py @@ -76,9 +76,7 @@ def __init__(self, arr: list[Any]) -> None: # Track invalid values (for reference, not essential) self.invalid_values: list[Any] = [ - x - for x in arr - if x is None or (isinstance(x, float) and math.isnan(x)) + x for x in arr if x is None or (isinstance(x, float) and math.isnan(x)) ] def compress(self, original: Any) -> int: @@ -143,4 +141,3 @@ def decompress(self, num: int) -> Any: f"Compressed: {compressed:>2} | " f"Decompressed: {decompressed!r}" ) - diff --git a/tests/data_compression/test_coordinate_compression_missing_values.py b/tests/data_compression/test_coordinate_compression_missing_values.py index f6bfa265fec7..56bc3a8be014 100644 --- a/tests/data_compression/test_coordinate_compression_missing_values.py +++ b/tests/data_compression/test_coordinate_compression_missing_values.py @@ -24,4 +24,3 @@ def test_duplicate_values(): cc = CoordinateCompressor(arr) assert cc.compress(10) == 0 assert cc.decompress(0) == 10 - From 5f101ce849bc283c0cabd69e8b8a42382950f4c7 Mon Sep 17 00:00:00 2001 From: rachit-27 Date: Mon, 10 Nov 2025 14:49:26 +0530 Subject: [PATCH 7/8] style(tests): fix Ruff lint issues in test_coordinate_compression_missing_values.py --- .../test_coordinate_compression_missing_values.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data_compression/test_coordinate_compression_missing_values.py b/tests/data_compression/test_coordinate_compression_missing_values.py index 56bc3a8be014..b57af39a43cf 100644 --- a/tests/data_compression/test_coordinate_compression_missing_values.py +++ b/tests/data_compression/test_coordinate_compression_missing_values.py @@ -1,5 +1,5 @@ + from data_compression.coordinate_compression import CoordinateCompressor -import math def test_basic_compression(): From 8a1e52f0d3ce443196df60dd8929b40e64b82e06 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 09:22:34 +0000 Subject: [PATCH 8/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../test_coordinate_compression_missing_values.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/data_compression/test_coordinate_compression_missing_values.py b/tests/data_compression/test_coordinate_compression_missing_values.py index b57af39a43cf..7e53fc5150f7 100644 --- a/tests/data_compression/test_coordinate_compression_missing_values.py +++ b/tests/data_compression/test_coordinate_compression_missing_values.py @@ -1,4 +1,3 @@ - from data_compression.coordinate_compression import CoordinateCompressor