Skip to content

Commit 116d3e9

Browse files
authored
REF: simplify mask_missing (#62049)
1 parent 10416dc commit 116d3e9

File tree

2 files changed

+30
-50
lines changed

2 files changed

+30
-50
lines changed

pandas/core/dtypes/common.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,10 +1168,10 @@ def is_numeric_v_string_like(a: ArrayLike, b) -> bool:
11681168
is_a_array = isinstance(a, np.ndarray)
11691169
is_b_array = isinstance(b, np.ndarray)
11701170

1171-
is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b")
1172-
is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b")
1173-
is_a_string_array = is_a_array and a.dtype.kind in ("S", "U")
1174-
is_b_string_array = is_b_array and b.dtype.kind in ("S", "U")
1171+
is_a_numeric_array = is_a_array and a.dtype.kind in "uifcb"
1172+
is_b_numeric_array = is_b_array and b.dtype.kind in "uifcb"
1173+
is_a_string_array = is_a_array and a.dtype.kind in "SU"
1174+
is_b_string_array = is_b_array and b.dtype.kind in "SU"
11751175

11761176
is_b_scalar_string_like = not is_b_array and isinstance(b, str)
11771177

pandas/core/missing.py

Lines changed: 26 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
is_array_like,
3535
is_bool_dtype,
3636
is_numeric_dtype,
37-
is_numeric_v_string_like,
3837
is_object_dtype,
3938
needs_i8_conversion,
4039
)
@@ -64,75 +63,56 @@ def check_value_size(value, mask: npt.NDArray[np.bool_], length: int):
6463
return value
6564

6665

67-
def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]:
66+
def mask_missing(arr: ArrayLike, value) -> npt.NDArray[np.bool_]:
6867
"""
6968
Return a masking array of same size/shape as arr
70-
with entries equaling any member of values_to_mask set to True
69+
with entries equaling value set to True.
7170
7271
Parameters
7372
----------
7473
arr : ArrayLike
75-
values_to_mask: list, tuple, or scalar
74+
value : scalar-like
75+
Caller has ensured `not is_list_like(value)` and that it can be held
76+
by `arr`.
7677
7778
Returns
7879
-------
7980
np.ndarray[bool]
8081
"""
81-
# When called from Block.replace/replace_list, values_to_mask is a scalar
82-
# known to be holdable by arr.
83-
# When called from Series._single_replace, values_to_mask is tuple or list
84-
dtype, values_to_mask = infer_dtype_from(values_to_mask)
82+
dtype, value = infer_dtype_from(value)
8583

86-
if isinstance(dtype, np.dtype):
87-
values_to_mask = np.array(values_to_mask, dtype=dtype)
88-
else:
89-
cls = dtype.construct_array_type()
90-
if not lib.is_list_like(values_to_mask):
91-
values_to_mask = [values_to_mask]
92-
values_to_mask = cls._from_sequence(values_to_mask, dtype=dtype, copy=False)
93-
94-
potential_na = False
95-
if is_object_dtype(arr.dtype):
96-
# pre-compute mask to avoid comparison to NA
97-
potential_na = True
98-
arr_mask = ~isna(arr)
99-
100-
na_mask = isna(values_to_mask)
101-
nonna = values_to_mask[~na_mask]
84+
if isna(value):
85+
return isna(arr)
10286

10387
# GH 21977
10488
mask = np.zeros(arr.shape, dtype=bool)
10589
if (
10690
is_numeric_dtype(arr.dtype)
10791
and not is_bool_dtype(arr.dtype)
108-
and is_bool_dtype(nonna.dtype)
92+
and lib.is_bool(value)
10993
):
94+
# e.g. test_replace_ea_float_with_bool, see GH#62048
11095
pass
11196
elif (
112-
is_bool_dtype(arr.dtype)
113-
and is_numeric_dtype(nonna.dtype)
114-
and not is_bool_dtype(nonna.dtype)
97+
is_bool_dtype(arr.dtype) and is_numeric_dtype(dtype) and not lib.is_bool(value)
11598
):
99+
# e.g. test_replace_ea_float_with_bool, see GH#62048
116100
pass
101+
elif is_numeric_dtype(arr.dtype) and isinstance(value, str):
102+
# GH#29553 prevent numpy deprecation warnings
103+
pass
104+
elif is_object_dtype(arr.dtype):
105+
# pre-compute mask to avoid comparison to NA
106+
# e.g. test_replace_na_in_obj_column
107+
arr_mask = ~isna(arr)
108+
mask[arr_mask] = arr[arr_mask] == value
117109
else:
118-
for x in nonna:
119-
if is_numeric_v_string_like(arr, x):
120-
# GH#29553 prevent numpy deprecation warnings
121-
pass
122-
else:
123-
if potential_na:
124-
new_mask = np.zeros(arr.shape, dtype=np.bool_)
125-
new_mask[arr_mask] = arr[arr_mask] == x
126-
else:
127-
new_mask = arr == x
128-
129-
if not isinstance(new_mask, np.ndarray):
130-
# usually BooleanArray
131-
new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
132-
mask |= new_mask
133-
134-
if na_mask.any():
135-
mask |= isna(arr)
110+
new_mask = arr == value
111+
112+
if not isinstance(new_mask, np.ndarray):
113+
# usually BooleanArray
114+
new_mask = new_mask.to_numpy(dtype=bool, na_value=False)
115+
mask = new_mask
136116

137117
return mask
138118

0 commit comments

Comments
 (0)