Skip to content

API: consistent NaN treatment for pyarrow dtypes #61732

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
31e65e0
BUG: Decimal(NaN) incorrectly allowed in ArrowEA constructor with tim…
jbrockmendel Jul 3, 2025
9dcd8fb
GH ref
jbrockmendel Jul 3, 2025
3fb47c7
BUG: ArrowEA constructor with timestamp type
jbrockmendel Jul 4, 2025
c18ab05
POC: consistent NaN treatment for pyarrow dtypes
jbrockmendel Jun 28, 2025
74a2248
comment
jbrockmendel Jun 28, 2025
9d8fef4
Down to 40 failing tests
jbrockmendel Jul 5, 2025
f47c746
Fix rank, json tests
jbrockmendel Jul 6, 2025
083f705
CLN: remove outdated
jbrockmendel Jul 6, 2025
a340203
Fix where kludge
jbrockmendel Jul 6, 2025
587e53f
update tests
jbrockmendel Jul 6, 2025
734465c
Fix remaining tests
jbrockmendel Jul 6, 2025
d2aeeff
mypy fixup
jbrockmendel Jul 7, 2025
73a95d2
old-numpy compat
jbrockmendel Jul 7, 2025
ce28027
simplify
jbrockmendel Jul 7, 2025
9300ad0
Merge branch 'main' into poc-arrow-nans
jbrockmendel Jul 30, 2025
76bc3d2
Merge branch 'main' into poc-arrow-nans
jbrockmendel Jul 31, 2025
0327507
Better option name, fixture
jbrockmendel Jul 31, 2025
c0bdd67
default True
jbrockmendel Jul 31, 2025
2467f6e
Patch ops
jbrockmendel Jul 31, 2025
6356cc0
mypy fixup
jbrockmendel Jul 31, 2025
09e5bf5
Test for setitem/construction
jbrockmendel Jul 31, 2025
ce36571
update ufunc test
jbrockmendel Jul 31, 2025
5bc2617
Improve rank test skips
jbrockmendel Jul 31, 2025
e0216cb
Merge branch 'main' into poc-arrow-nans
jbrockmendel Aug 1, 2025
f3c608a
Merge branch 'main' into poc-arrow-nans
jbrockmendel Aug 2, 2025
5f38d5e
Merge branch 'main' into poc-arrow-nans
jbrockmendel Aug 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Patch ops
  • Loading branch information
jbrockmendel committed Jul 31, 2025
commit 2467f6e74a51bcaaaf54c99b760b096f2a84018f
47 changes: 46 additions & 1 deletion pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,16 @@
pandas_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndex,
ABCSeries,
)
from pandas.core.dtypes.missing import isna

from pandas.core import (
algorithms as algos,
arraylike,
missing,
ops,
roperator,
Expand Down Expand Up @@ -752,6 +758,39 @@ def __array__(

return self.to_numpy(dtype=dtype, copy=copy)

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if any(
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
):
return NotImplemented

result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

if "out" in kwargs:
return arraylike.dispatch_ufunc_with_out(
self, ufunc, method, *inputs, **kwargs
)

if method == "reduce":
result = arraylike.dispatch_reduction_ufunc(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

if self.dtype.kind == "f":
# e.g. test_log_arrow_backed_missing_value
new_inputs = [
x if x is not self else x.to_numpy(na_value=np.nan) for x in inputs
]
return getattr(ufunc, method)(*new_inputs, **kwargs)

return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)

def __invert__(self) -> Self:
# This is a bit wise op for integer types
if pa.types.is_integer(self._pa_array.type):
Expand Down Expand Up @@ -923,7 +962,13 @@ def _logical_method(self, other, op) -> Self:
return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)

def _arith_method(self, other, op) -> Self:
return self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS)
result = self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS)
if is_nan_na() and result.dtype.kind == "f":
parr = result._pa_array
mask = pc.is_nan(parr).to_numpy()
arr = pc.replace_with_mask(parr, mask, pa.scalar(None, type=parr.type))
result = type(self)(arr)
return result

def equals(self, other) -> bool:
if not isinstance(other, ArrowExtensionArray):
Expand Down
8 changes: 0 additions & 8 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2539,14 +2539,6 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if result is not NotImplemented:
return result

# TODO: putting this here is hacky as heck
if self.dtype == "float64[pyarrow]":
# e.g. test_log_arrow_backed_missing_value
new_inputs = [
x if x is not self else x.to_numpy(na_value=np.nan) for x in inputs
]
return getattr(ufunc, method)(*new_inputs, **kwargs)

return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)

def map(self, mapper, na_action: Literal["ignore"] | None = None):
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3589,3 +3589,26 @@ def test_timestamp_dtype_matches_to_datetime():
expected = pd.Series([ts], dtype=dtype1).convert_dtypes(dtype_backend="pyarrow")

tm.assert_series_equal(result, expected)


def test_ops_with_nan_is_na(using_nan_is_na):
# GH#61732
ser = pd.Series([-1, 0, 1], dtype="int64[pyarrow]")

result = ser - np.nan
if using_nan_is_na:
assert result.isna().all()
else:
assert not result.isna().any()

result = ser * np.nan
if using_nan_is_na:
assert result.isna().all()
else:
assert not result.isna().any()

result = ser / 0
if using_nan_is_na:
assert result.isna()[1]
else:
assert not result.isna()[1]
2 changes: 1 addition & 1 deletion pandas/tests/series/test_npfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_numpy_argwhere(index):


@td.skip_if_no("pyarrow")
def test_log_arrow_backed_missing_value():
def test_log_arrow_backed_missing_value(using_nan_is_na):
# GH#56285
ser = Series([1, 2, None], dtype="float64[pyarrow]")
result = np.log(ser)
Expand Down