diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ec5027840dfd5..392c48354256b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -693,7 +693,7 @@ Categorical - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`) - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`) - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) -- +- Bug in :meth:`array.astype` where casting a pyarrow-backed array to a temporal :class:`CategoricalDtype` (e.g. with datetime or timedelta categories) raised or incorrectly converted values to all ``NaT`` (:issue:`62051`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f2a401bd3687a..347c7a6b76d8b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -73,6 +73,7 @@ NDArrayBackedExtensionArray, ravel_compat, ) +from pandas.core.arrays.arrow.array import ArrowExtensionArray from pandas.core.base import ( ExtensionArray, NoNewAttributesMixin, @@ -483,6 +484,18 @@ def __init__( ) else: + if isinstance(values, ArrowExtensionArray): + from pandas.api.types import ( + is_datetime64_any_dtype, + is_timedelta64_dtype, + ) + + cat_dtype = dtype.categories.dtype + if is_datetime64_any_dtype(cat_dtype) or is_timedelta64_dtype( + cat_dtype + ): + values = values.to_numpy() + codes = _get_codes_for_values(values, dtype.categories) if null_mask.any(): diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index 7ed4da69f5a99..4b9906f020c4b 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -7,14 +7,17 @@ CategoricalDtype, CategoricalIndex, DatetimeIndex, + Index, Interval, NaT, Period, Timestamp, array, + isna, to_datetime, ) import pandas._testing as tm +from pandas.core.arrays.arrow.array import ArrowExtensionArray class TestAstype: @@ -160,3 +163,20 @@ def test_astype_category_readonly_mask_values(self): result = arr.astype("category") expected = array([0, 1, 2], dtype="Int64").astype("category") tm.assert_extension_array_equal(result, expected) + + def test_arrow_array_astype_to_categorical_dtype_temporal(self): + # GH#62051 + pytest.importorskip("pyarrow") + arr = array( + ["2017-01-01", "2018-01-01", "2019-01-01"], dtype="date32[day][pyarrow]" + ) + cats = Index(["2017-01-01", "2018-01-01", "2019-01-01"], dtype="M8[s]") + dtype = CategoricalDtype(categories=cats, ordered=False) + + assert not all(isna(arr.astype(dtype))) + + arr = ArrowExtensionArray._from_sequence(["1h", "2h", "3h"]) + cats = Index(["1h", "2h", "3h"], dtype="m8[ns]") + dtype = CategoricalDtype(cats, ordered=False) + + assert not all(isna(arr.astype(dtype)))