From 5949f0d01163144b2e3dee0dff11894d879058ce Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 5 Aug 2025 09:27:27 -0700 Subject: [PATCH] REF: simplify mask_missing --- pandas/core/dtypes/common.py | 8 ++-- pandas/core/missing.py | 72 +++++++++++++----------------------- 2 files changed, 30 insertions(+), 50 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 68d99937f728c..99415b6fc6ec8 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1168,10 +1168,10 @@ def is_numeric_v_string_like(a: ArrayLike, b) -> bool: is_a_array = isinstance(a, np.ndarray) is_b_array = isinstance(b, np.ndarray) - is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b") - is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b") - is_a_string_array = is_a_array and a.dtype.kind in ("S", "U") - is_b_string_array = is_b_array and b.dtype.kind in ("S", "U") + is_a_numeric_array = is_a_array and a.dtype.kind in "uifcb" + is_b_numeric_array = is_b_array and b.dtype.kind in "uifcb" + is_a_string_array = is_a_array and a.dtype.kind in "SU" + is_b_string_array = is_b_array and b.dtype.kind in "SU" is_b_scalar_string_like = not is_b_array and isinstance(b, str) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 66609fa870f14..b443b22201e37 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -34,7 +34,6 @@ is_array_like, is_bool_dtype, is_numeric_dtype, - is_numeric_v_string_like, is_object_dtype, needs_i8_conversion, ) @@ -64,75 +63,56 @@ def check_value_size(value, mask: npt.NDArray[np.bool_], length: int): return value -def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: +def mask_missing(arr: ArrayLike, value) -> npt.NDArray[np.bool_]: """ Return a masking array of same size/shape as arr - with entries equaling any member of values_to_mask set to True + with entries equaling value set to True. Parameters ---------- arr : ArrayLike - values_to_mask: list, tuple, or scalar + value : scalar-like + Caller has ensured `not is_list_like(value)` and that it can be held + by `arr`. Returns ------- np.ndarray[bool] """ - # When called from Block.replace/replace_list, values_to_mask is a scalar - # known to be holdable by arr. - # When called from Series._single_replace, values_to_mask is tuple or list - dtype, values_to_mask = infer_dtype_from(values_to_mask) + dtype, value = infer_dtype_from(value) - if isinstance(dtype, np.dtype): - values_to_mask = np.array(values_to_mask, dtype=dtype) - else: - cls = dtype.construct_array_type() - if not lib.is_list_like(values_to_mask): - values_to_mask = [values_to_mask] - values_to_mask = cls._from_sequence(values_to_mask, dtype=dtype, copy=False) - - potential_na = False - if is_object_dtype(arr.dtype): - # pre-compute mask to avoid comparison to NA - potential_na = True - arr_mask = ~isna(arr) - - na_mask = isna(values_to_mask) - nonna = values_to_mask[~na_mask] + if isna(value): + return isna(arr) # GH 21977 mask = np.zeros(arr.shape, dtype=bool) if ( is_numeric_dtype(arr.dtype) and not is_bool_dtype(arr.dtype) - and is_bool_dtype(nonna.dtype) + and lib.is_bool(value) ): + # e.g. test_replace_ea_float_with_bool, see GH#62048 pass elif ( - is_bool_dtype(arr.dtype) - and is_numeric_dtype(nonna.dtype) - and not is_bool_dtype(nonna.dtype) + is_bool_dtype(arr.dtype) and is_numeric_dtype(dtype) and not lib.is_bool(value) ): + # e.g. test_replace_ea_float_with_bool, see GH#62048 pass + elif is_numeric_dtype(arr.dtype) and isinstance(value, str): + # GH#29553 prevent numpy deprecation warnings + pass + elif is_object_dtype(arr.dtype): + # pre-compute mask to avoid comparison to NA + # e.g. test_replace_na_in_obj_column + arr_mask = ~isna(arr) + mask[arr_mask] = arr[arr_mask] == value else: - for x in nonna: - if is_numeric_v_string_like(arr, x): - # GH#29553 prevent numpy deprecation warnings - pass - else: - if potential_na: - new_mask = np.zeros(arr.shape, dtype=np.bool_) - new_mask[arr_mask] = arr[arr_mask] == x - else: - new_mask = arr == x - - if not isinstance(new_mask, np.ndarray): - # usually BooleanArray - new_mask = new_mask.to_numpy(dtype=bool, na_value=False) - mask |= new_mask - - if na_mask.any(): - mask |= isna(arr) + new_mask = arr == value + + if not isinstance(new_mask, np.ndarray): + # usually BooleanArray + new_mask = new_mask.to_numpy(dtype=bool, na_value=False) + mask = new_mask return mask