diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 4fb24c9ad1538..9fe13b2e7b7c6 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -1,6 +1,9 @@ from collections import defaultdict +import sys +import warnings cimport cython +from cpython cimport PY_VERSION_HEX from cpython.object cimport PyObject from cpython.pyport cimport PY_SSIZE_T_MAX from cpython.slice cimport PySlice_GetIndicesEx @@ -20,6 +23,7 @@ from numpy cimport ( cnp.import_array() from pandas._libs.algos import ensure_int64 +from pandas.errors import ChainedAssignmentError from pandas._libs.util cimport ( is_array, @@ -996,3 +1000,59 @@ cdef class BlockValuesRefs: return self._has_reference_maybe_locked() ELSE: return self._has_reference_maybe_locked() + + +cdef extern from "Python.h": + """ + #if PY_VERSION_HEX < 0x030E0000 + int __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary(PyObject *ref) + { + return 0; + } + #else + #define __Pyx_PyUnstable_Object_IsUniqueReferencedTemporary \ + PyUnstable_Object_IsUniqueReferencedTemporary + #endif + """ + int PyUnstable_Object_IsUniqueReferencedTemporary\ + "__Pyx_PyUnstable_Object_IsUniqueReferencedTemporary"(object o) except -1 + + +cdef inline bint _is_unique_referenced_temporary(object obj) except -1: + if PY_VERSION_HEX >= 0x030E0000: + return PyUnstable_Object_IsUniqueReferencedTemporary(obj) + else: + return sys.getrefcount(obj) <= 1 + + +# # Python version compatibility for PyUnstable_Object_IsUniqueReferencedTemporary +# IF PY_VERSION_HEX >= 0x030E0000: +# # Python 3.14+ has PyUnstable_Object_IsUniqueReferencedTemporary +# cdef inline bint _is_unique_referenced_temporary(object obj) except -1: +# return PyUnstable_Object_IsUniqueReferencedTemporary(obj) +# ELSE: +# # Fallback for older Python versions using sys.getrefcount +# cdef inline bint _is_unique_referenced_temporary(object obj) except -1: +# # sys.getrefcount includes the reference from getrefcount itself +# # So if refcount is 2, it means only one external reference exists +# return sys.getrefcount(obj) == 2 + + +# @cython.auto_pickle(False) +cdef class SetitemMixin: + + def __setitem__(self, key, value): + cdef bint is_unique = _is_unique_referenced_temporary(self) + # print("Refcount self: ", sys.getrefcount(self)) + # print("Is unique referenced temporary: ", is_unique) + if is_unique: + warnings.warn( + "A value is trying to be set on a copy of a DataFrame or Series " + "through chained assignment.", + ChainedAssignmentError, + stacklevel=1, + ) + self._setitem(key, value) + + def __delitem__(self, key) -> None: + self._delitem(key) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 3c5854602df53..ba4c432ccd266 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -267,6 +267,7 @@ cpdef array_to_datetime( str unit_for_numerics=None, ): """ + TODO no longer up to date Converts a 1D array of date-like values to a numpy array of either: 1) datetime64[ns] data 2) datetime.datetime objects, if OutOfBoundsDatetime or TypeError diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index beb4a69232b27..8247356f25f4d 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -22,6 +22,7 @@ PeriodArray, TimedeltaArray, ) +from pandas.core.generic import NDFrame from pandas.core.internals import BlockManager if TYPE_CHECKING: @@ -90,6 +91,10 @@ def load_reduce(self) -> None: cls = args[0] stack[-1] = NDArrayBacked.__new__(*args) return + elif args and issubclass(args[0], NDFrame): + cls = args[0] + stack[-1] = cls.__new__(cls) + return raise dispatch[pickle.REDUCE[0]] = load_reduce # type: ignore[assignment] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e48620a854edb..c607ce2360155 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -47,6 +47,7 @@ properties, ) from pandas._libs.hashtable import duplicated +from pandas._libs.internals import SetitemMixin from pandas._libs.lib import is_range_indexer from pandas.compat import PYPY from pandas.compat._constants import REF_COUNT @@ -58,7 +59,6 @@ ) from pandas.errors.cow import ( _chained_assignment_method_msg, - _chained_assignment_msg, ) from pandas.util._decorators import ( Appender, @@ -511,7 +511,7 @@ @set_module("pandas") -class DataFrame(NDFrame, OpsMixin): +class DataFrame(SetitemMixin, NDFrame, OpsMixin): """ Two-dimensional, size-mutable, potentially heterogeneous tabular data. @@ -660,6 +660,11 @@ class DataFrame(NDFrame, OpsMixin): # and ExtensionArray. Should NOT be overridden by subclasses. __pandas_priority__ = 4000 + # override those to avoid inheriting from SetitemMixin (cython generates + # them by default) + __reduce__ = object.__reduce__ + __setstate__ = NDFrame.__setstate__ + @property def _constructor(self) -> type[DataFrame]: return DataFrame @@ -4212,7 +4217,7 @@ def isetitem(self, loc, value) -> None: arraylike, refs = self._sanitize_column(value) self._iset_item_mgr(loc, arraylike, inplace=False, refs=refs) - def __setitem__(self, key, value) -> None: + def _setitem(self, key, value) -> None: """ Set item(s) in DataFrame by key. @@ -4296,11 +4301,11 @@ def __setitem__(self, key, value) -> None: z 3 50 # Values for 'a' and 'b' are completely ignored! """ - if not PYPY: - if sys.getrefcount(self) <= 3: - warnings.warn( - _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 - ) + # if not PYPY: + # if sys.getrefcount(self) <= 3: + # warnings.warn( + # _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + # ) key = com.apply_if_callable(key, self) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2ae28266266f6..b562292a0f30a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4257,8 +4257,9 @@ def _slice(self, slobj: slice, axis: AxisInt = 0) -> Self: result = result.__finalize__(self) return result + # __delitem__ is implemented in SetitemMixin and dispatches to this method @final - def __delitem__(self, key) -> None: + def _delitem(self, key) -> None: """ Delete item """ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 42dd8adbead09..ec25c5fa73429 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2581,6 +2581,12 @@ def __getitem__(self, key): return super().__getitem__(key) def __setitem__(self, key, value) -> None: + if not PYPY: + if sys.getrefcount(self.obj) <= 2: + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + ) + if self.ndim == 2 and not self._axes_are_unique: # GH#33041 fall back to .loc if not isinstance(key, tuple) or not all(is_scalar(x) for x in key): @@ -2605,6 +2611,15 @@ def _convert_key(self, key): raise ValueError("iAt based indexing can only have integer indexers") return key + def __setitem__(self, key, value) -> None: + if not PYPY: + if sys.getrefcount(self.obj) <= 2: + warnings.warn( + _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + ) + + return super().__setitem__(key, value) + def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]: """ diff --git a/pandas/core/series.py b/pandas/core/series.py index 6ae03f2464f76..8ba6509723d90 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -32,6 +32,7 @@ properties, reshape, ) +from pandas._libs.internals import SetitemMixin from pandas._libs.lib import is_range_indexer from pandas.compat import PYPY from pandas.compat._constants import REF_COUNT @@ -43,7 +44,6 @@ ) from pandas.errors.cow import ( _chained_assignment_method_msg, - _chained_assignment_msg, ) from pandas.util._decorators import ( Appender, @@ -234,7 +234,7 @@ # class "NDFrame") # definition in base class "NDFrame" @set_module("pandas") -class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] +class Series(SetitemMixin, base.IndexOpsMixin, NDFrame): # type: ignore[misc] """ One-dimensional ndarray with axis labels (including time series). @@ -362,6 +362,11 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] ) _mgr: SingleBlockManager + # override those to avoid inheriting from SetitemMixin (cython generates + # them by default) + __reduce__ = object.__reduce__ + __setstate__ = NDFrame.__setstate__ + # ---------------------------------------------------------------------- # Constructors @@ -1058,12 +1063,12 @@ def _get_value(self, label, takeable: bool = False): else: return self.iloc[loc] - def __setitem__(self, key, value) -> None: - if not PYPY: - if sys.getrefcount(self) <= 3: - warnings.warn( - _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 - ) + def _setitem(self, key, value) -> None: + # if not PYPY: + # if sys.getrefcount(self) <= 3: + # warnings.warn( + # _chained_assignment_msg, ChainedAssignmentError, stacklevel=2 + # ) check_dict_or_set_indexers(key) key = com.apply_if_callable(key, self) diff --git a/pandas/tests/copy_view/test_chained_assignment_deprecation.py b/pandas/tests/copy_view/test_chained_assignment_deprecation.py index 4aef69a6fde98..b116a99c08605 100644 --- a/pandas/tests/copy_view/test_chained_assignment_deprecation.py +++ b/pandas/tests/copy_view/test_chained_assignment_deprecation.py @@ -31,3 +31,71 @@ def test_frame_setitem(indexer): with tm.raises_chained_assignment_error(): df[0:3][indexer] = 10 + + +@pytest.mark.parametrize( + "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] +) +def test_series_iloc_setitem(indexer): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + + with tm.raises_chained_assignment_error(): + df["a"].iloc[indexer] = 0 + + +@pytest.mark.parametrize( + "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] +) +def test_frame_iloc_setitem(indexer): + df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) + + with tm.raises_chained_assignment_error(): + df[0:3].iloc[indexer] = 10 + + +@pytest.mark.parametrize( + "indexer", [0, [0, 1], slice(0, 2), np.array([True, False, True])] +) +def test_series_loc_setitem(indexer): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + + with tm.raises_chained_assignment_error(): + df["a"].loc[indexer] = 0 + + +@pytest.mark.parametrize( + "indexer", [0, [0, 1], (0, "a"), slice(0, 2), np.array([True, False, True])] +) +def test_frame_loc_setitem(indexer): + df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) + + with tm.raises_chained_assignment_error(): + df[0:3].loc[indexer] = 10 + + +def test_series_at_setitem(): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + + with tm.raises_chained_assignment_error(): + df["a"].at[0] = 0 + + +def test_frame_at_setitem(): + df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) + + with tm.raises_chained_assignment_error(): + df[0:3].at[0, "a"] = 10 + + +def test_series_iat_setitem(): + df = DataFrame({"a": [1, 2, 3], "b": 1}) + + with tm.raises_chained_assignment_error(): + df["a"].iat[0] = 0 + + +def test_frame_iat_setitem(): + df = DataFrame({"a": [1, 2, 3, 4, 5], "b": 1}) + + with tm.raises_chained_assignment_error(): + df[0:3].iat[0, 0] = 10 diff --git a/pandas/tests/io/test_spss.py b/pandas/tests/io/test_spss.py index 973cb21ac3041..6418bfb1691c6 100644 --- a/pandas/tests/io/test_spss.py +++ b/pandas/tests/io/test_spss.py @@ -11,10 +11,6 @@ pyreadstat = pytest.importorskip("pyreadstat") -# TODO(CoW) - detection of chained assignment in cython -# https://github.com/pandas-dev/pandas/issues/51315 -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") @pytest.mark.parametrize("path_klass", [lambda p: p, Path]) def test_spss_labelled_num(path_klass, datapath): # test file from the Haven project (https://haven.tidyverse.org/) @@ -31,8 +27,6 @@ def test_spss_labelled_num(path_klass, datapath): tm.assert_frame_equal(df, expected) -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_labelled_num_na(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -48,8 +42,6 @@ def test_spss_labelled_num_na(datapath): tm.assert_frame_equal(df, expected) -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_labelled_str(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -65,8 +57,6 @@ def test_spss_labelled_str(datapath): tm.assert_frame_equal(df, expected) -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_kwargs(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -81,8 +71,6 @@ def test_spss_kwargs(datapath): tm.assert_frame_equal(df, expected) -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_umlauts(datapath): # test file from the Haven project (https://haven.tidyverse.org/) # Licence at LICENSES/HAVEN_LICENSE, LICENSES/HAVEN_MIT @@ -140,8 +128,6 @@ def test_invalid_dtype_backend(): pd.read_spss("test", dtype_backend="numpy") -@pytest.mark.filterwarnings("ignore::pandas.errors.ChainedAssignmentError") -@pytest.mark.filterwarnings("ignore:ChainedAssignmentError:FutureWarning") def test_spss_metadata(datapath): # GH 54264 fname = datapath("io", "data", "spss", "labelled-num.sav")