diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f7b64b03a52fd..3191c077d3c36 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -687,6 +687,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) +- Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`) - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`) - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index e4420d07675ba..d57856115d276 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -575,7 +575,7 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: # GH 10696/18593/18630 dtype = self.dtype.update_dtype(dtype) self = self.copy() if copy else self - result = self._set_dtype(dtype) + result = self._set_dtype(dtype, copy=False) elif isinstance(dtype, ExtensionDtype): return super().astype(dtype, copy=copy) @@ -945,7 +945,7 @@ def _set_categories(self, categories, fastpath: bool = False) -> None: super().__init__(self._ndarray, new_dtype) - def _set_dtype(self, dtype: CategoricalDtype) -> Self: + def _set_dtype(self, dtype: CategoricalDtype, copy: bool = True) -> Self: """ Internal method for directly updating the CategoricalDtype @@ -958,7 +958,9 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Self: We don't do any validation here. It's assumed that the dtype is a (valid) instance of `CategoricalDtype`. """ - codes = recode_for_categories(self.codes, self.categories, dtype.categories) + codes = recode_for_categories( + self.codes, self.categories, dtype.categories, copy + ) return type(self)._simple_new(codes, dtype=dtype) def set_ordered(self, value: bool) -> Self: diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index 00999d491b242..42edb1f511391 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -130,6 +130,14 @@ def test_astype_category(self, dtype_ordered, ordered): expected = cat tm.assert_categorical_equal(result, expected) + def test_astype_category_copy_false_nocopy_codes(self): + # GH#62000 + cat = Categorical([3, 2, 4, 1]) + new = cat.astype("category", copy=False) + assert tm.shares_memory(new.codes, cat.codes) + new = cat.astype("category", copy=True) + assert not tm.shares_memory(new.codes, cat.codes) + def test_astype_object_datetime_categories(self): # GH#40754 cat = Categorical(to_datetime(["2021-03-27", NaT]))