Skip to content

Commit 6b417e7

Browse files
committed
performance improved approach
1 parent 6dc52f6 commit 6b417e7

File tree

1 file changed

+26
-18
lines changed

1 file changed

+26
-18
lines changed

pandas/core/algorithms.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@
5757
is_integer_dtype,
5858
is_list_like,
5959
is_object_dtype,
60-
is_signed_integer_dtype,
6160
needs_i8_conversion,
6261
)
6362
from pandas.core.dtypes.concat import concat_compat
@@ -518,23 +517,10 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]:
518517
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
519518
orig_values = list(values)
520519
values = _ensure_arraylike(orig_values, func_name="isin-targets")
521-
522-
if (
523-
len(values) > 0
524-
and values.dtype.kind in "iufcb"
525-
# If the dtypes differ and either side is unsigned integer,
526-
# prefer object dtype to avoid unsafe upcast to float64 that
527-
# can lose precision for large 64-bit integers.
528-
and (not is_dtype_equal(values, comps))
529-
and (
530-
(not is_signed_integer_dtype(comps))
531-
or (not is_signed_integer_dtype(values))
532-
)
533-
):
534-
# GH#46485: Use object to avoid upcast to float64 later
535-
# Ensure symmetric behavior when mixing signed and unsigned
536-
# integer dtypes.
537-
values = construct_1d_object_array_from_listlike(orig_values)
520+
# Keep values as a numeric ndarray where possible; we handle
521+
# signed/unsigned integer mixes with a fast-path later (after
522+
# comps_array extraction) to avoid object-dtype conversions that
523+
# harm performance for large numeric arrays.
538524

539525
elif isinstance(values, ABCMultiIndex):
540526
# Avoid raising in extract_array
@@ -586,6 +572,28 @@ def f(c, v):
586572
f = lambda a, b: np.isin(a, b).ravel()
587573

588574
else:
575+
# Fast-path: handle integer-kind mixes without upcasting to float64.
576+
if (
577+
values.dtype.kind in "iu"
578+
and comps_array.dtype.kind in "iu"
579+
and not is_dtype_equal(values.dtype, comps_array.dtype)
580+
):
581+
try:
582+
if values.size > 0 and comps_array.size > 0:
583+
signed_negative = False
584+
if values.dtype.kind == "i":
585+
signed_negative = values.min() < 0
586+
if comps_array.dtype.kind == "i":
587+
signed_negative = signed_negative or (comps_array.min() < 0)
588+
589+
if not signed_negative:
590+
values_u = values.astype("uint64", copy=False)
591+
comps_u = comps_array.astype("uint64", copy=False)
592+
return htable.ismember(comps_u, values_u)
593+
except Exception:
594+
# fall back to generic path on error
595+
pass
596+
589597
common = np_find_common_type(values.dtype, comps_array.dtype)
590598
values = values.astype(common, copy=False)
591599
comps_array = comps_array.astype(common, copy=False)

0 commit comments

Comments
 (0)