Skip to content

REF: simplify mask_missing #62049

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1168,10 +1168,10 @@ def is_numeric_v_string_like(a: ArrayLike, b) -> bool:
is_a_array = isinstance(a, np.ndarray)
is_b_array = isinstance(b, np.ndarray)

is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b")
is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b")
is_a_string_array = is_a_array and a.dtype.kind in ("S", "U")
is_b_string_array = is_b_array and b.dtype.kind in ("S", "U")
is_a_numeric_array = is_a_array and a.dtype.kind in "uifcb"
is_b_numeric_array = is_b_array and b.dtype.kind in "uifcb"
is_a_string_array = is_a_array and a.dtype.kind in "SU"
is_b_string_array = is_b_array and b.dtype.kind in "SU"

is_b_scalar_string_like = not is_b_array and isinstance(b, str)

Expand Down
72 changes: 26 additions & 46 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
is_array_like,
is_bool_dtype,
is_numeric_dtype,
is_numeric_v_string_like,
is_object_dtype,
needs_i8_conversion,
)
Expand Down Expand Up @@ -64,75 +63,56 @@ def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):
return value


def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
def mask_missing(arr: ArrayLike, value) -> npt.NDArray[np.bool_]:
"""
Return a masking array of same size/shape as arr
with entries equaling any member of values_to_mask set to True
with entries equaling value set to True.

Parameters
----------
arr : ArrayLike
values_to_mask: list, tuple, or scalar
value : scalar-like
Caller has ensured `not is_list_like(value)` and that it can be held
by `arr`.

Returns
-------
np.ndarray[bool]
"""
# When called from Block.replace/replace_list, values_to_mask is a scalar
# known to be holdable by arr.
# When called from Series._single_replace, values_to_mask is tuple or list
dtype, values_to_mask = infer_dtype_from(values_to_mask)
dtype, value = infer_dtype_from(value)

if isinstance(dtype, np.dtype):
values_to_mask = np.array(values_to_mask, dtype=dtype)
else:
cls = dtype.construct_array_type()
if not lib.is_list_like(values_to_mask):
values_to_mask = [values_to_mask]
values_to_mask = cls._from_sequence(values_to_mask, dtype=dtype, copy=False)

potential_na = False
if is_object_dtype(arr.dtype):
# pre-compute mask to avoid comparison to NA
potential_na = True
arr_mask = ~isna(arr)

na_mask = isna(values_to_mask)
nonna = values_to_mask[~na_mask]
if isna(value):
return isna(arr)

# GH 21977
mask = np.zeros(arr.shape, dtype=bool)
if (
is_numeric_dtype(arr.dtype)
and not is_bool_dtype(arr.dtype)
and is_bool_dtype(nonna.dtype)
and lib.is_bool(value)
):
# e.g. test_replace_ea_float_with_bool, see GH#62048
pass
elif (
is_bool_dtype(arr.dtype)
and is_numeric_dtype(nonna.dtype)
and not is_bool_dtype(nonna.dtype)
is_bool_dtype(arr.dtype) and is_numeric_dtype(dtype) and not lib.is_bool(value)
):
# e.g. test_replace_ea_float_with_bool, see GH#62048
pass
elif is_numeric_dtype(arr.dtype) and isinstance(value, str):
# GH#29553 prevent numpy deprecation warnings
pass
elif is_object_dtype(arr.dtype):
# pre-compute mask to avoid comparison to NA
# e.g. test_replace_na_in_obj_column
arr_mask = ~isna(arr)
mask[arr_mask] = arr[arr_mask] == value
else:
for x in nonna:
if is_numeric_v_string_like(arr, x):
# GH#29553 prevent numpy deprecation warnings
pass
else:
if potential_na:
new_mask = np.zeros(arr.shape, dtype=np.bool_)
new_mask[arr_mask] = arr[arr_mask] == x
else:
new_mask = arr == x

if not isinstance(new_mask, np.ndarray):
# usually BooleanArray
new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
mask |= new_mask

if na_mask.any():
mask |= isna(arr)
new_mask = arr == value

if not isinstance(new_mask, np.ndarray):
# usually BooleanArray
new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
mask = new_mask

return mask

Expand Down
Loading