Skip to content

Test chained assignment detection for Python 3.14 #62070

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from collections import defaultdict
import sys
import warnings

cimport cython
from cpython cimport PY_VERSION_HEX
from cpython.object cimport PyObject
from cpython.pyport cimport PY_SSIZE_T_MAX
from cpython.slice cimport PySlice_GetIndicesEx
Expand All @@ -20,6 +23,7 @@ from numpy cimport (
cnp.import_array()

from pandas._libs.algos import ensure_int64
from pandas.errors import ChainedAssignmentError

from pandas._libs.util cimport (
is_array,
Expand Down Expand Up @@ -996,3 +1000,59 @@ cdef class BlockValuesRefs:
return self._has_reference_maybe_locked()
ELSE:
return self._has_reference_maybe_locked()


cdef extern from "Python.h":
"""
#if PY_VERSION_HEX < 0x030E0000
int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref)
{
return 0;
}
#else
#define __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary \
PyUnstable_Object_IsUniqueReferencedTemporary
#endif
"""
int PyUnstable_Object_IsUniqueReferencedTemporary\
"__Pyx_PyUnstable_Object_IsUniqueReferencedTemporary"(object o) except -1


cdef inline bint _is_unique_referenced_temporary(object obj) except -1:
if PY_VERSION_HEX >= 0x030E0000:
return PyUnstable_Object_IsUniqueReferencedTemporary(obj)
else:
return sys.getrefcount(obj) <= 1


# # Python version compatibility for PyUnstable_Object_IsUniqueReferencedTemporary
# IF PY_VERSION_HEX >= 0x030E0000:
# # Python 3.14+ has PyUnstable_Object_IsUniqueReferencedTemporary
# cdef inline bint _is_unique_referenced_temporary(object obj) except -1:
# return PyUnstable_Object_IsUniqueReferencedTemporary(obj)
# ELSE:
# # Fallback for older Python versions using sys.getrefcount
# cdef inline bint _is_unique_referenced_temporary(object obj) except -1:
# # sys.getrefcount includes the reference from getrefcount itself
# # So if refcount is 2, it means only one external reference exists
# return sys.getrefcount(obj) == 2


# @cython.auto_pickle(False)
cdef class SetitemMixin:

def __setitem__(self, key, value):
cdef bint is_unique = _is_unique_referenced_temporary(self)
# print("Refcount self: ", sys.getrefcount(self))
# print("Is unique referenced temporary: ", is_unique)
if is_unique:
warnings.warn(
"A value is trying to be set on a copy of a DataFrame or Series "
"through chained assignment.",
ChainedAssignmentError,
stacklevel=1,
)
self._setitem(key, value)

def __delitem__(self, key) -> None:
self._delitem(key)
1 change: 1 addition & 0 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ cpdef array_to_datetime(
str unit_for_numerics=None,
):
"""
TODO no longer up to date
Converts a 1D array of date-like values to a numpy array of either:
1) datetime64[ns] data
2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError
Expand Down
5 changes: 5 additions & 0 deletions pandas/compat/pickle_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
PeriodArray,
TimedeltaArray,
)
from pandas.core.generic import NDFrame
from pandas.core.internals import BlockManager

if TYPE_CHECKING:
Expand Down Expand Up @@ -90,6 +91,10 @@ def load_reduce(self) -> None:
cls = args[0]
stack[-1] = NDArrayBacked.__new__(*args)
return
elif args and issubclass(args[0], NDFrame):
cls = args[0]
stack[-1] = cls.__new__(cls)
return
raise

dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment]
Expand Down
21 changes: 13 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
properties,
)
from pandas._libs.hashtable import duplicated
from pandas._libs.internals import SetitemMixin
from pandas._libs.lib import is_range_indexer
from pandas.compat import PYPY
from pandas.compat._constants import REF_COUNT
Expand All @@ -58,7 +59,6 @@
)
from pandas.errors.cow import (
_chained_assignment_method_msg,
_chained_assignment_msg,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -511,7 +511,7 @@


@set_module("pandas")
class DataFrame(NDFrame, OpsMixin):
class DataFrame(SetitemMixin, NDFrame, OpsMixin):
"""
Two-dimensional, size-mutable, potentially heterogeneous tabular data.

Expand Down Expand Up @@ -660,6 +660,11 @@ class DataFrame(NDFrame, OpsMixin):
# and ExtensionArray. Should NOT be overridden by subclasses.
__pandas_priority__ = 4000

# override those to avoid inheriting from SetitemMixin (cython generates
# them by default)
__reduce__ = object.__reduce__
__setstate__ = NDFrame.__setstate__

@property
def _constructor(self) -> type[DataFrame]:
return DataFrame
Expand Down Expand Up @@ -4212,7 +4217,7 @@ def isetitem(self, loc, value) -> None:
arraylike, refs = self._sanitize_column(value)
self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs)

def __setitem__(self, key, value) -> None:
def _setitem(self, key, value) -> None:
"""
Set item(s) in DataFrame by key.

Expand Down Expand Up @@ -4296,11 +4301,11 @@ def __setitem__(self, key, value) -> None:
z 3 50
# Values for 'a' and 'b' are completely ignored!
"""
if not PYPY:
if sys.getrefcount(self) <= 3:
warnings.warn(
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
)
# if not PYPY:
# if sys.getrefcount(self) <= 3:
# warnings.warn(
# _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
# )

key = com.apply_if_callable(key, self)

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4257,8 +4257,9 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self:
result = result.__finalize__(self)
return result

# __delitem__ is implemented in SetitemMixin and dispatches to this method
@final
def __delitem__(self, key) -> None:
def _delitem(self, key) -> None:
"""
Delete item
"""
Expand Down
15 changes: 15 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2581,6 +2581,12 @@ def __getitem__(self, key):
return super().__getitem__(key)

def __setitem__(self, key, value) -> None:
if not PYPY:
if sys.getrefcount(self.obj) <= 2:
warnings.warn(
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
)

if self.ndim == 2 and not self._axes_are_unique:
# GH#33041 fall back to .loc
if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
Expand All @@ -2605,6 +2611,15 @@ def _convert_key(self, key):
raise ValueError("iAt based indexing can only have integer indexers")
return key

def __setitem__(self, key, value) -> None:
if not PYPY:
if sys.getrefcount(self.obj) <= 2:
warnings.warn(
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
)

return super().__setitem__(key, value)


def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:
"""
Expand Down
21 changes: 13 additions & 8 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
properties,
reshape,
)
from pandas._libs.internals import SetitemMixin
from pandas._libs.lib import is_range_indexer
from pandas.compat import PYPY
from pandas.compat._constants import REF_COUNT
Expand All @@ -43,7 +44,6 @@
)
from pandas.errors.cow import (
_chained_assignment_method_msg,
_chained_assignment_msg,
)
from pandas.util._decorators import (
Appender,
Expand Down Expand Up @@ -234,7 +234,7 @@
# class "NDFrame")
# definition in base class "NDFrame"
@set_module("pandas")
class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc]
class Series(SetitemMixin, base.IndexOpsMixin, NDFrame): # type: ignore[misc]
"""
One-dimensional ndarray with axis labels (including time series).

Expand Down Expand Up @@ -362,6 +362,11 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc]
)
_mgr: SingleBlockManager

# override those to avoid inheriting from SetitemMixin (cython generates
# them by default)
__reduce__ = object.__reduce__
__setstate__ = NDFrame.__setstate__

# ----------------------------------------------------------------------
# Constructors

Expand Down Expand Up @@ -1058,12 +1063,12 @@ def _get_value(self, label, takeable: bool = False):
else:
return self.iloc[loc]

def __setitem__(self, key, value) -> None:
if not PYPY:
if sys.getrefcount(self) <= 3:
warnings.warn(
_chained_assignment_msg, ChainedAssignmentError, stacklevel=2
)
def _setitem(self, key, value) -> None:
# if not PYPY:
# if sys.getrefcount(self) <= 3:
# warnings.warn(
# _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
# )

check_dict_or_set_indexers(key)
key = com.apply_if_callable(key, self)
Expand Down
68 changes: 68 additions & 0 deletions pandas/tests/copy_view/test_chained_assignment_deprecation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,71 @@ def test_frame_setitem(indexer):

with tm.raises_chained_assignment_error():
df[0:3][indexer] = 10


@pytest.mark.parametrize(
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
)
def test_series_iloc_setitem(indexer):
df = DataFrame({"a": [1, 2, 3], "b": 1})

with tm.raises_chained_assignment_error():
df["a"].iloc[indexer] = 0


@pytest.mark.parametrize(
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
)
def test_frame_iloc_setitem(indexer):
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})

with tm.raises_chained_assignment_error():
df[0:3].iloc[indexer] = 10


@pytest.mark.parametrize(
"indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])]
)
def test_series_loc_setitem(indexer):
df = DataFrame({"a": [1, 2, 3], "b": 1})

with tm.raises_chained_assignment_error():
df["a"].loc[indexer] = 0


@pytest.mark.parametrize(
"indexer", [0, [0, 1], (0, "a"), slice(0, 2), np.array([True, False, True])]
)
def test_frame_loc_setitem(indexer):
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})

with tm.raises_chained_assignment_error():
df[0:3].loc[indexer] = 10


def test_series_at_setitem():
df = DataFrame({"a": [1, 2, 3], "b": 1})

with tm.raises_chained_assignment_error():
df["a"].at[0] = 0


def test_frame_at_setitem():
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})

with tm.raises_chained_assignment_error():
df[0:3].at[0, "a"] = 10


def test_series_iat_setitem():
df = DataFrame({"a": [1, 2, 3], "b": 1})

with tm.raises_chained_assignment_error():
df["a"].iat[0] = 0


def test_frame_iat_setitem():
df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1})

with tm.raises_chained_assignment_error():
df[0:3].iat[0, 0] = 10
14 changes: 0 additions & 14 deletions pandas/tests/io/test_spss.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@
pyreadstat = pytest.importorskip("pyreadstat")


# TODO(CoW) - detection of chained assignment in cython
# https://github.com/pandas-dev/pandas/issues/51315
@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
@pytest.mark.parametrize("path_klass", [lambda p: p, Path])
def test_spss_labelled_num(path_klass, datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
Expand All @@ -31,8 +27,6 @@ def test_spss_labelled_num(path_klass, datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_labelled_num_na(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
# Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
Expand All @@ -48,8 +42,6 @@ def test_spss_labelled_num_na(datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_labelled_str(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
# Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
Expand All @@ -65,8 +57,6 @@ def test_spss_labelled_str(datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_kwargs(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
# Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
Expand All @@ -81,8 +71,6 @@ def test_spss_kwargs(datapath):
tm.assert_frame_equal(df, expected)


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_umlauts(datapath):
# test file from the Haven project (https://haven.tidyverse.org/)
# Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT
Expand Down Expand Up @@ -140,8 +128,6 @@ def test_invalid_dtype_backend():
pd.read_spss("test", dtype_backend="numpy")


@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError")
@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning")
def test_spss_metadata(datapath):
# GH 54264
fname = datapath("io", "data", "spss", "labelled-num.sav")
Expand Down
Loading