Skip to content

Commit 2ad2abd

Browse files
authored
REF: make copy keyword in recode_for_categories keyword only (#62019)
1 parent 8475758 commit 2ad2abd

File tree

8 files changed

+21
-18
lines changed

8 files changed

+21
-18
lines changed

pandas/_libs/index.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,7 @@ cdef class BaseMultiIndexCodesEngine:
803803
int_keys : 1-dimensional array of dtype uint64 or object
804804
Integers representing one combination each
805805
"""
806-
level_codes = list(target._recode_for_new_levels(self.levels))
806+
level_codes = list(target._recode_for_new_levels(self.levels, copy=True))
807807
for i, codes in enumerate(level_codes):
808808
if self.levels[i].hasnans:
809809
na_index = self.levels[i].isna().nonzero()[0][0]

pandas/core/arrays/categorical.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -670,13 +670,15 @@ def _from_inferred_categories(
670670
if known_categories:
671671
# Recode from observation order to dtype.categories order.
672672
categories = dtype.categories
673-
codes = recode_for_categories(inferred_codes, cats, categories)
673+
codes = recode_for_categories(inferred_codes, cats, categories, copy=False)
674674
elif not cats.is_monotonic_increasing:
675675
# Sort categories and recode for unknown categories.
676676
unsorted = cats.copy()
677677
categories = cats.sort_values()
678678

679-
codes = recode_for_categories(inferred_codes, unsorted, categories)
679+
codes = recode_for_categories(
680+
inferred_codes, unsorted, categories, copy=False
681+
)
680682
dtype = CategoricalDtype(categories, ordered=False)
681683
else:
682684
dtype = CategoricalDtype(cats, ordered=False)
@@ -945,7 +947,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None:
945947

946948
super().__init__(self._ndarray, new_dtype)
947949

948-
def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
950+
def _set_dtype(self, dtype: CategoricalDtype, *, copy: bool) -> Self:
949951
"""
950952
Internal method for directly updating the CategoricalDtype
951953
@@ -959,7 +961,7 @@ def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self:
959961
a (valid) instance of `CategoricalDtype`.
960962
"""
961963
codes = recode_for_categories(
962-
self.codes, self.categories, dtype.categories, copy
964+
self.codes, self.categories, dtype.categories, copy=copy
963965
)
964966
return type(self)._simple_new(codes, dtype=dtype)
965967

@@ -1154,7 +1156,7 @@ def set_categories(
11541156
codes = cat._codes
11551157
else:
11561158
codes = recode_for_categories(
1157-
cat.codes, cat.categories, new_dtype.categories
1159+
cat.codes, cat.categories, new_dtype.categories, copy=False
11581160
)
11591161
NDArrayBacked.__init__(cat, codes, new_dtype)
11601162
return cat
@@ -3006,7 +3008,7 @@ def _get_codes_for_values(
30063008

30073009

30083010
def recode_for_categories(
3009-
codes: np.ndarray, old_categories, new_categories, copy: bool = True
3011+
codes: np.ndarray, old_categories, new_categories, *, copy: bool
30103012
) -> np.ndarray:
30113013
"""
30123014
Convert a set of codes for to a new set of categories
@@ -3027,7 +3029,7 @@ def recode_for_categories(
30273029
>>> old_cat = pd.Index(["b", "a", "c"])
30283030
>>> new_cat = pd.Index(["a", "b"])
30293031
>>> codes = np.array([0, 1, 1, 2])
3030-
>>> recode_for_categories(codes, old_cat, new_cat)
3032+
>>> recode_for_categories(codes, old_cat, new_cat, copy=True)
30313033
array([ 1, 0, 0, -1], dtype=int8)
30323034
"""
30333035
if len(old_categories) == 0:

pandas/core/dtypes/concat.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,8 @@ def _maybe_unwrap(x):
318318
categories = categories.sort_values()
319319

320320
new_codes = [
321-
recode_for_categories(c.codes, c.categories, categories) for c in to_union
321+
recode_for_categories(c.codes, c.categories, categories, copy=False)
322+
for c in to_union
322323
]
323324
new_codes = np.concatenate(new_codes)
324325
else:

pandas/core/groupby/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def recode_for_groupby(c: Categorical, sort: bool, observed: bool) -> Categorica
5353

5454
# we recode according to the uniques
5555
categories = c.categories.take(take_codes)
56-
codes = recode_for_categories(c.codes, c.categories, categories)
56+
codes = recode_for_categories(c.codes, c.categories, categories, copy=False)
5757

5858
# return a new categorical that maps our new codes
5959
# and categories

pandas/core/indexes/multi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2675,7 +2675,7 @@ def _reorder_ilevels(self, order) -> MultiIndex:
26752675
)
26762676

26772677
def _recode_for_new_levels(
2678-
self, new_levels, copy: bool = True
2678+
self, new_levels, *, copy: bool
26792679
) -> Generator[np.ndarray]:
26802680
if len(new_levels) > self.nlevels:
26812681
raise AssertionError(

pandas/tests/arrays/categorical/test_api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,7 @@ def test_recode_to_categories(self, codes, old, new, expected):
480480
expected = np.asanyarray(expected, dtype=np.int8)
481481
old = Index(old)
482482
new = Index(new)
483-
result = recode_for_categories(codes, old, new)
483+
result = recode_for_categories(codes, old, new, copy=True)
484484
tm.assert_numpy_array_equal(result, expected)
485485

486486
def test_recode_to_categories_large(self):
@@ -489,5 +489,5 @@ def test_recode_to_categories_large(self):
489489
old = Index(codes)
490490
expected = np.arange(N - 1, -1, -1, dtype=np.int16)
491491
new = Index(expected)
492-
result = recode_for_categories(codes, old, new)
492+
result = recode_for_categories(codes, old, new, copy=True)
493493
tm.assert_numpy_array_equal(result, expected)

pandas/tests/arrays/categorical/test_dtypes.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ def test_categories_match_up_to_permutation(self):
4949

5050
def test_set_dtype_same(self):
5151
c = Categorical(["a", "b", "c"])
52-
result = c._set_dtype(CategoricalDtype(["a", "b", "c"]))
52+
result = c._set_dtype(CategoricalDtype(["a", "b", "c"]), copy=True)
5353
tm.assert_categorical_equal(result, c)
5454

5555
def test_set_dtype_new_categories(self):
5656
c = Categorical(["a", "b", "c"])
57-
result = c._set_dtype(CategoricalDtype(list("abcd")))
57+
result = c._set_dtype(CategoricalDtype(list("abcd")), copy=True)
5858
tm.assert_numpy_array_equal(result.codes, c.codes)
5959
tm.assert_index_equal(result.dtype.categories, Index(list("abcd")))
6060

@@ -86,12 +86,12 @@ def test_set_dtype_new_categories(self):
8686
def test_set_dtype_many(self, values, categories, new_categories, ordered):
8787
c = Categorical(values, categories)
8888
expected = Categorical(values, new_categories, ordered)
89-
result = c._set_dtype(expected.dtype)
89+
result = c._set_dtype(expected.dtype, copy=True)
9090
tm.assert_categorical_equal(result, expected)
9191

9292
def test_set_dtype_no_overlap(self):
9393
c = Categorical(["a", "b", "c"], ["d", "e"])
94-
result = c._set_dtype(CategoricalDtype(["a", "b"]))
94+
result = c._set_dtype(CategoricalDtype(["a", "b"]), copy=True)
9595
expected = Categorical([None, None, None], categories=["a", "b"])
9696
tm.assert_categorical_equal(result, expected)
9797

pandas/tests/arrays/categorical/test_missing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def test_nan_handling(self):
5252

5353
def test_set_dtype_nans(self):
5454
c = Categorical(["a", "b", np.nan])
55-
result = c._set_dtype(CategoricalDtype(["a", "c"]))
55+
result = c._set_dtype(CategoricalDtype(["a", "c"]), copy=True)
5656
tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))
5757

5858
def test_set_item_nan(self):

0 commit comments

Comments
 (0)