From 58284dd7db64d3a045929fa865e9e1104718d07f Mon Sep 17 00:00:00 2001 From: arthurlw Date: Wed, 6 Aug 2025 10:32:21 +0700 Subject: [PATCH 1/7] Added condition for CategoricalDtype --- pandas/core/arrays/base.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index bfa2309bb023a..178eaba1d447b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -757,6 +757,8 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: >>> arr2.dtype dtype('float64') """ + from pandas.api.types import CategoricalDtype + dtype = pandas_dtype(dtype) if dtype == self.dtype: if not copy: @@ -764,6 +766,11 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: else: return self.copy() + if isinstance(dtype, CategoricalDtype): + from pandas.core.arrays import Categorical + + return Categorical(self.to_numpy(), dtype=dtype) + if isinstance(dtype, ExtensionDtype): cls = dtype.construct_array_type() return cls._from_sequence(self, dtype=dtype, copy=copy) From 1896199d1a3c6ba3c35d58c4398b8098742b5683 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Wed, 6 Aug 2025 10:51:53 +0700 Subject: [PATCH 2/7] Added tests --- pandas/tests/arrays/categorical/test_astype.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index 7ed4da69f5a99..3c662616e3c89 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -7,14 +7,17 @@ CategoricalDtype, CategoricalIndex, DatetimeIndex, + Index, Interval, NaT, Period, Timestamp, array, + isna, to_datetime, ) import pandas._testing as tm +from pandas.core.arrays.arrow.array import ArrowExtensionArray class TestAstype: @@ -160,3 +163,18 @@ def test_astype_category_readonly_mask_values(self): result = arr.astype("category") expected = array([0, 1, 2], dtype="Int64").astype("category") tm.assert_extension_array_equal(result, expected) + + def test_arrow_array_astype_to_categorical_dtype_temporal(self): + arr = array( + ["2017-01-01", "2018-01-01", "2019-01-01"], dtype="date32[day][pyarrow]" + ) + cats = Index(["2017-01-01", "2018-01-01", "2019-01-01"], dtype="M8[s]") + dtype = CategoricalDtype(categories=cats, ordered=False) + + assert not all(isna(arr.astype(dtype))) + + arr = ArrowExtensionArray._from_sequence(["1h", "2h", "3h"]) + cats = Index(["1h", "2h", "3h"], dtype="m8[ns]") + dtype = CategoricalDtype(cats, ordered=False) + + assert not all(isna(arr.astype(dtype))) From 36b84cfea8226ed06ad1ca3f23dd65fc02992163 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Wed, 6 Aug 2025 10:53:47 +0700 Subject: [PATCH 3/7] whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ec5027840dfd5..392c48354256b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -693,7 +693,7 @@ Categorical - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`) - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`) - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) -- +- Bug in :meth:`array.astype` where casting a pyarrow-backed array to a temporal :class:`CategoricalDtype` (e.g. with datetime or timedelta categories) raised or incorrectly converted values to all ``NaT`` (:issue:`62051`) Datetimelike ^^^^^^^^^^^^ From 2dc06987ccf142ccbb39079cf05fa3305a6d96c7 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Wed, 6 Aug 2025 10:54:37 +0700 Subject: [PATCH 4/7] Added GH ref --- pandas/tests/arrays/categorical/test_astype.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index 3c662616e3c89..12337afba58ea 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -165,6 +165,7 @@ def test_astype_category_readonly_mask_values(self): tm.assert_extension_array_equal(result, expected) def test_arrow_array_astype_to_categorical_dtype_temporal(self): + # GH#62051 arr = array( ["2017-01-01", "2018-01-01", "2019-01-01"], dtype="date32[day][pyarrow]" ) From 5987952f7d0e74f3555ee6c95da8cca95902b4a6 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Wed, 6 Aug 2025 11:26:16 +0700 Subject: [PATCH 5/7] Updated conditions --- pandas/core/arrays/base.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 178eaba1d447b..95f8205e8badc 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -757,7 +757,11 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: >>> arr2.dtype dtype('float64') """ - from pandas.api.types import CategoricalDtype + from pandas.api.types import ( + CategoricalDtype, + is_datetime64_any_dtype, + ) + from pandas.core.arrays import Categorical dtype = pandas_dtype(dtype) if dtype == self.dtype: @@ -766,9 +770,11 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: else: return self.copy() - if isinstance(dtype, CategoricalDtype): - from pandas.core.arrays import Categorical - + if ( + isinstance(self, Categorical) + and isinstance(dtype, CategoricalDtype) + and is_datetime64_any_dtype(self.categories) + ): return Categorical(self.to_numpy(), dtype=dtype) if isinstance(dtype, ExtensionDtype): From a5fab10c7b96058621d5cca9468b87d9e0717ccf Mon Sep 17 00:00:00 2001 From: arthurlw Date: Wed, 6 Aug 2025 11:59:59 +0700 Subject: [PATCH 6/7] Updated fix logic --- pandas/core/arrays/base.py | 13 ------------- pandas/core/arrays/categorical.py | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 95f8205e8badc..bfa2309bb023a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -757,12 +757,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: >>> arr2.dtype dtype('float64') """ - from pandas.api.types import ( - CategoricalDtype, - is_datetime64_any_dtype, - ) - from pandas.core.arrays import Categorical - dtype = pandas_dtype(dtype) if dtype == self.dtype: if not copy: @@ -770,13 +764,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: else: return self.copy() - if ( - isinstance(self, Categorical) - and isinstance(dtype, CategoricalDtype) - and is_datetime64_any_dtype(self.categories) - ): - return Categorical(self.to_numpy(), dtype=dtype) - if isinstance(dtype, ExtensionDtype): cls = dtype.construct_array_type() return cls._from_sequence(self, dtype=dtype, copy=copy) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f2a401bd3687a..347c7a6b76d8b 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -73,6 +73,7 @@ NDArrayBackedExtensionArray, ravel_compat, ) +from pandas.core.arrays.arrow.array import ArrowExtensionArray from pandas.core.base import ( ExtensionArray, NoNewAttributesMixin, @@ -483,6 +484,18 @@ def __init__( ) else: + if isinstance(values, ArrowExtensionArray): + from pandas.api.types import ( + is_datetime64_any_dtype, + is_timedelta64_dtype, + ) + + cat_dtype = dtype.categories.dtype + if is_datetime64_any_dtype(cat_dtype) or is_timedelta64_dtype( + cat_dtype + ): + values = values.to_numpy() + codes = _get_codes_for_values(values, dtype.categories) if null_mask.any(): From e6b7c64ce29f10233ebf8783405a2b6d527dd793 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Wed, 6 Aug 2025 12:26:49 +0700 Subject: [PATCH 7/7] importorskip --- pandas/tests/arrays/categorical/test_astype.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/arrays/categorical/test_astype.py b/pandas/tests/arrays/categorical/test_astype.py index 12337afba58ea..4b9906f020c4b 100644 --- a/pandas/tests/arrays/categorical/test_astype.py +++ b/pandas/tests/arrays/categorical/test_astype.py @@ -166,6 +166,7 @@ def test_astype_category_readonly_mask_values(self): def test_arrow_array_astype_to_categorical_dtype_temporal(self): # GH#62051 + pytest.importorskip("pyarrow") arr = array( ["2017-01-01", "2018-01-01", "2019-01-01"], dtype="date32[day][pyarrow]" )