Skip to content
This repository was archived by the owner on Apr 1, 2026. It is now read-only.

Commit 29cbcc0

Browse files
feat: Support unstable sort operations
1 parent 3ddd7eb commit 29cbcc0

File tree

11 files changed

+76
-19
lines changed

11 files changed

+76
-19
lines changed

bigframes/core/array_value.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -212,11 +212,17 @@ def filter(self, predicate: ex.Expression):
212212
return arr.drop_columns(filter_ids)
213213

214214
def order_by(
215-
self, by: Sequence[OrderingExpression], is_total_order: bool = False
215+
self,
216+
by: Sequence[OrderingExpression],
217+
is_total_order: bool = False,
218+
stable: bool = True,
216219
) -> ArrayValue:
217220
return ArrayValue(
218221
nodes.OrderByNode(
219-
child=self.node, by=tuple(by), is_total_order=is_total_order
222+
child=self.node,
223+
by=tuple(by),
224+
is_total_order=is_total_order,
225+
stable=stable,
220226
)
221227
)
222228

bigframes/core/blocks.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,9 +395,10 @@ def cols_matching_label(self, partial_label: Label) -> typing.Sequence[str]:
395395
def order_by(
396396
self,
397397
by: typing.Sequence[ordering.OrderingExpression],
398+
stable: bool = True,
398399
) -> Block:
399400
return Block(
400-
self._expr.order_by(by),
401+
self._expr.order_by(by, stable=stable),
401402
index_columns=self.index_columns,
402403
column_labels=self.column_labels,
403404
index_labels=self.index.names,

bigframes/core/indexes/base.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,7 @@ def sort_values(
436436
*,
437437
inplace: bool = False,
438438
ascending: bool = True,
439+
kind: __builtins__.str | None = None,
439440
na_position: __builtins__.str = "last",
440441
) -> Index:
441442
if na_position not in ["first", "last"]:
@@ -448,7 +449,8 @@ def sort_values(
448449
else order.descending_over(column, na_last)
449450
for column in index_columns
450451
]
451-
return Index(self._block.order_by(ordering))
452+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
453+
return Index(self._block.order_by(ordering, stable=is_stable))
452454

453455
def astype(
454456
self,

bigframes/core/nodes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -992,7 +992,8 @@ def remap_refs(
992992
@dataclasses.dataclass(frozen=True, eq=False)
993993
class OrderByNode(UnaryNode):
994994
by: Tuple[OrderingExpression, ...]
995-
# This is an optimization, if true, can discard previous orderings.
995+
stable: bool = False
996+
# This is an optimization, if true, can discard previous orderings, even if doing a stable sort
996997
# might be a total ordering even if false
997998
is_total_order: bool = False
998999

bigframes/core/rewrite/order.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@ def pull_up_order_inner(
7171
child_result, child_order = pull_up_order_inner(node.child)
7272
return child_result, child_order.with_reverse()
7373
elif isinstance(node, bigframes.core.nodes.OrderByNode):
74-
if node.is_total_order:
74+
# unstable sorts don't care about previous order, total orders override previous order
75+
if (not node.stable) or node.is_total_order:
7576
new_node = remove_order(node.child)
7677
else:
7778
new_node, child_order = pull_up_order_inner(node.child)

bigframes/dataframe.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2390,6 +2390,7 @@ def sort_index(
23902390
*,
23912391
ascending: bool = ...,
23922392
inplace: Literal[False] = ...,
2393+
kind: str = ...,
23932394
na_position: Literal["first", "last"] = ...,
23942395
) -> DataFrame:
23952396
...
@@ -2400,6 +2401,7 @@ def sort_index(
24002401
*,
24012402
ascending: bool = ...,
24022403
inplace: Literal[True] = ...,
2404+
kind: str = ...,
24032405
na_position: Literal["first", "last"] = ...,
24042406
) -> None:
24052407
...
@@ -2410,6 +2412,7 @@ def sort_index(
24102412
axis: Union[int, str] = 0,
24112413
ascending: bool = True,
24122414
inplace: bool = False,
2415+
kind: str | None = None,
24132416
na_position: Literal["first", "last"] = "last",
24142417
) -> Optional[DataFrame]:
24152418
if utils.get_axis_number(axis) == 0:
@@ -2423,7 +2426,8 @@ def sort_index(
24232426
else order.descending_over(column, na_last)
24242427
for column in index_columns
24252428
]
2426-
block = self._block.order_by(ordering)
2429+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
2430+
block = self._block.order_by(ordering, stable=is_stable)
24272431
else: # axis=1
24282432
_, indexer = self.columns.sort_values(
24292433
return_indexer=True, ascending=ascending, na_position=na_position # type: ignore
@@ -2467,7 +2471,7 @@ def sort_values(
24672471
*,
24682472
inplace: bool = False,
24692473
ascending: bool | typing.Sequence[bool] = True,
2470-
kind: str = "quicksort",
2474+
kind: str | None = None,
24712475
na_position: typing.Literal["first", "last"] = "last",
24722476
) -> Optional[DataFrame]:
24732477
if isinstance(by, (bigframes.series.Series, indexes.Index, DataFrame)):
@@ -2499,7 +2503,8 @@ def sort_values(
24992503
if is_ascending
25002504
else order.descending_over(column_id, na_last)
25012505
)
2502-
block = self._block.order_by(ordering)
2506+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
2507+
block = self._block.order_by(ordering, stable=is_stable)
25032508
if inplace:
25042509
self._set_block(block)
25052510
return None

bigframes/series.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,19 +1801,21 @@ def sort_values(
18011801
axis=0,
18021802
inplace: bool = False,
18031803
ascending=True,
1804-
kind: str = "quicksort",
1804+
kind: str | None = None,
18051805
na_position: typing.Literal["first", "last"] = "last",
18061806
) -> Optional[Series]:
18071807
if axis != 0 and axis != "index":
18081808
raise ValueError(f"No axis named {axis} for object type Series")
18091809
if na_position not in ["first", "last"]:
18101810
raise ValueError("Param na_position must be one of 'first' or 'last'")
1811+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
18111812
block = self._block.order_by(
18121813
[
18131814
order.ascending_over(self._value_column, (na_position == "last"))
18141815
if ascending
18151816
else order.descending_over(self._value_column, (na_position == "last"))
18161817
],
1818+
stable=is_stable,
18171819
)
18181820
if inplace:
18191821
self._set_block(block)
@@ -1823,19 +1825,37 @@ def sort_values(
18231825

18241826
@typing.overload # type: ignore[override]
18251827
def sort_index(
1826-
self, *, axis=..., inplace: Literal[False] = ..., ascending=..., na_position=...
1828+
self,
1829+
*,
1830+
axis=...,
1831+
inplace: Literal[False] = ...,
1832+
ascending=...,
1833+
kind: str = ...,
1834+
na_position=...,
18271835
) -> Series:
18281836
...
18291837

18301838
@typing.overload
18311839
def sort_index(
1832-
self, *, axis=0, inplace: Literal[True] = ..., ascending=..., na_position=...
1840+
self,
1841+
*,
1842+
axis=0,
1843+
inplace: Literal[True] = ...,
1844+
ascending=...,
1845+
kind: str = ...,
1846+
na_position=...,
18331847
) -> None:
18341848
...
18351849

18361850
@validations.requires_index
18371851
def sort_index(
1838-
self, *, axis=0, inplace: bool = False, ascending=True, na_position="last"
1852+
self,
1853+
*,
1854+
axis=0,
1855+
inplace: bool = False,
1856+
ascending=True,
1857+
kind: str | None = None,
1858+
na_position="last",
18391859
) -> Optional[Series]:
18401860
# TODO(tbergeron): Support level parameter once multi-index introduced.
18411861
if axis != 0 and axis != "index":
@@ -1850,7 +1870,8 @@ def sort_index(
18501870
else order.descending_over(column, na_last)
18511871
for column in block.index_columns
18521872
]
1853-
block = block.order_by(ordering)
1873+
is_stable = (kind or constants.DEFAULT_SORT_KIND) in ["stable", "mergesort"]
1874+
block = block.order_by(ordering, stable=is_stable)
18541875
if inplace:
18551876
self._set_block(block)
18561877
return None

third_party/bigframes_vendored/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,3 +55,5 @@
5555
"_deferred",
5656
]
5757
VALID_WRITE_ENGINES = typing.get_args(WriteEngineType)
58+
59+
DEFAULT_SORT_KIND = "stable"

third_party/bigframes_vendored/pandas/core/frame.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2210,7 +2210,7 @@ def sort_values(
22102210
*,
22112211
inplace: bool = False,
22122212
ascending: bool | Sequence[bool] = True,
2213-
kind: str = "quicksort",
2213+
kind: str | None = None,
22142214
na_position: Literal["first", "last"] = "last",
22152215
):
22162216
"""Sort by the values along row axis.
@@ -2296,7 +2296,7 @@ def sort_values(
22962296
the by.
22972297
inplace (bool, default False):
22982298
If True, perform operation in-place.
2299-
kind (str, default 'quicksort'):
2299+
kind (str, default None):
23002300
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
23012301
'heapsort', 'stable'. Ignored except when determining whether to
23022302
sort stably. 'mergesort' or 'stable' will result in stable reorder.
@@ -2320,6 +2320,7 @@ def sort_index(
23202320
axis: str | int = 0,
23212321
ascending: bool = True,
23222322
inplace: bool = False,
2323+
kind: str | None = None,
23232324
na_position: Literal["first", "last"] = "last",
23242325
):
23252326
"""Sort object by labels (along an axis).
@@ -2332,6 +2333,10 @@ def sort_index(
23322333
Sort ascending vs. descending.
23332334
inplace (bool, default False):
23342335
Whether to modify the DataFrame rather than creating a new one.
2336+
kind (str, default None):
2337+
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
2338+
'heapsort', 'stable'. Ignored except when determining whether to
2339+
sort stably. 'mergesort' or 'stable' will result in stable reorder.
23352340
na_position ({'first', 'last'}, default 'last'):
23362341
Puts NaNs at the beginning if `first`; `last` puts NaNs at the end.
23372342
Not implemented for MultiIndex.

third_party/bigframes_vendored/pandas/core/indexes/base.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -828,7 +828,11 @@ def nunique(self) -> int:
828828
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
829829

830830
def sort_values(
831-
self, *, ascending: bool = True, na_position: str = "last"
831+
self,
832+
*,
833+
ascending: bool = True,
834+
kind: str | None = None,
835+
na_position: str = "last",
832836
) -> Index:
833837
"""
834838
Return a sorted copy of the index.
@@ -851,6 +855,10 @@ def sort_values(
851855
Args:
852856
ascending (bool, default True):
853857
Should the index values be sorted in an ascending order.
858+
kind (str, default None):
859+
Choice of sorting algorithm. Accepts 'quicksort', 'mergesort',
860+
'heapsort', 'stable'. Ignored except when determining whether to
861+
sort stably. 'mergesort' or 'stable' will result in stable reorder.
854862
na_position ({'first' or 'last'}, default 'last'):
855863
Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
856864
the end.

0 commit comments

Comments
 (0)