From ecabf811602df606ff32d4f0f84d2698f03ff880 Mon Sep 17 00:00:00 2001 From: Roline Stapny Saldanha Date: Sat, 26 Jul 2025 14:18:45 -0700 Subject: [PATCH 1/4] BUG: Fix Series.reindex losing values when reindexing to MultiIndex --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/series.py | 9 ++++ pandas/tests/series/methods/test_reindex.py | 46 +++++++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a9841c4475822..ac4cec3d84458 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -787,6 +787,7 @@ MultiIndex - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`) - Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`) +- Bug in :meth:`Series.reindex` where reindexing a Series with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`) I/O ^^^ diff --git a/pandas/core/series.py b/pandas/core/series.py index 63ef68f202a6e..e327cddfe3f1a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4886,6 +4886,15 @@ def reindex( # type: ignore[override] limit: int | None = None, tolerance=None, ) -> Series: + # Automatically detect matching level when reindexing from Index to MultiIndex. + # This prevents values from being incorrectly set to NaN when the source index + # name matches a level name in the target MultiIndex + if ( + level is None + and isinstance(index, MultiIndex) + and self.index.name in index.names + ): + level = self.index.name return super().reindex( index=index, method=method, diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 442d73cadfe47..9191d14e0884d 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -434,3 +434,49 @@ def test_reindex_expand_nonnano_nat(dtype): np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]") ) tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "idx,expected_match_level_a", + [ + # Source index has matching name - should match level "a" + (Index([81, 82], name="a"), True), + # Source index has no name - should not match any level + (Index([81, 82]), False), + # Source index name doesn't match any level - should not match + (Index([81, 82], name="x"), False), + ], +) +def test_reindex_multiindex_automatic_level(idx, expected_match_level_a): + """ + Test automatic level detection when reindexing from Index to MultiIndex. + """ + series = Series([26.73, 24.255], index=idx) + target = MultiIndex.from_product( + [[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"] + ) + + result = series.reindex(target) + + if expected_match_level_a: + # Should match behavior of explicit level="a" + expected = series.reindex(target, level="a") + else: + # Should contain all NaN values + expected = Series(np.nan, index=target, dtype=series.dtype) + + tm.assert_series_equal(result, expected) + + +def test_reindex_multiindex_explicit_level_overrides(): + """ + Test that explicit level parameter overrides automatic detection. + """ + series = Series([26.73, 24.255], index=Index([81, 82], name="a")) + target = MultiIndex.from_product( + [[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"] + ) + + result = series.reindex(target, level=0) + expected = series.reindex(target, level="a") + tm.assert_series_equal(result, expected) From fe66e041c028a6a0882a6cb2b813ade76a65d27b Mon Sep 17 00:00:00 2001 From: Roline Stapny Saldanha Date: Tue, 29 Jul 2025 06:51:00 -0700 Subject: [PATCH 2/4] BUG: Fix dataframe.reindex to detect matching level when source has single Index --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/frame.py | 12 +++++ pandas/tests/frame/methods/test_reindex.py | 52 +++++++++++++++++++++ pandas/tests/series/methods/test_reindex.py | 12 ++--- 4 files changed, 71 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 1666e4341ecea..162243aee9066 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -788,6 +788,7 @@ MultiIndex - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`) - Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`) +- Bug in :meth:`DataFrame.reindex` where reindexing a Dataframe with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`) - Bug in :meth:`Series.reindex` where reindexing a Series with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`) I/O diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d657f2124c61f..72ce4bcb16e5f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5355,6 +5355,18 @@ def reindex( limit: int | None = None, tolerance=None, ) -> DataFrame: + # Automatically detect matching level when reindexing from Index to MultiIndex. + # This prevents values from being incorrectly set to NaN when the source index + # name matches a level name in the target MultiIndex. Only applies when source + # is not already a MultiIndex. + if ( + level is None + and index is not None + and isinstance(index, MultiIndex) + and not isinstance(self.index, MultiIndex) + and self.index.name in index.names + ): + level = self.index.name return super().reindex( labels=labels, index=index, diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 37adc31fb0f4d..3f99afd0b37bf 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1258,3 +1258,55 @@ def test_invalid_method(self): msg = "Invalid fill method" with pytest.raises(ValueError, match=msg): df.reindex([1, 0, 2], method="asfreq") + + def test_reindex_index_name_matches_multiindex_level(self): + """ + Test automatic level detection when reindexing from Index to MultiIndex. + When source index name matches a level name in target MultiIndex and level + is not specified, should behave same as if level was explicitly set. + """ + # Create source DataFrame with named Index + df = DataFrame( + {"value": [1, 2], "other": ["A", "B"]}, + index=Index([10, 20], name="a"), + ) + + # Create target MultiIndex with matching level name + target = MultiIndex.from_product( + [[10, 20], ["x", "y"]], + names=["a", "b"], # 'a' matches source index name + ) + + result = df.reindex(index=target) + expected = df.reindex(index=target, level="a") + + tm.assert_frame_equal(result, expected) + + # Verify values are propagated correctly + expected_values = { + (10, "x"): {"value": 1, "other": "A"}, + (10, "y"): {"value": 1, "other": "A"}, + (20, "x"): {"value": 2, "other": "B"}, + (20, "y"): {"value": 2, "other": "B"}, + } + for idx, expected_row in expected_values.items(): + for col, val in expected_row.items(): + assert result.loc[idx, col] == val + + def test_reindex_index_name_no_match_multiindex_level(self): + """ + Test reindexing behavior when source index name doesn't match any level + in target MultiIndex. Should fill with NaN since there's no level match. + """ + df = DataFrame({"value": [1, 2]}, index=Index([10, 20], name="different_name")) + + target = MultiIndex.from_product([[10, 20], ["x", "y"]], names=["a", "b"]) + + result = df.reindex(index=target) + + # Should fill with NaN since no level match + assert result.isna().all().all() + + # Verify shape is correct + assert result.index.equals(target) + assert result.columns.equals(df.columns) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 9191d14e0884d..8abd3e40deed9 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -437,21 +437,21 @@ def test_reindex_expand_nonnano_nat(dtype): @pytest.mark.parametrize( - "idx,expected_match_level_a", + "name, expected_match_level_a", [ # Source index has matching name - should match level "a" - (Index([81, 82], name="a"), True), + ("a", True), # Source index has no name - should not match any level - (Index([81, 82]), False), + (None, False), # Source index name doesn't match any level - should not match - (Index([81, 82], name="x"), False), + ("x", False), ], ) -def test_reindex_multiindex_automatic_level(idx, expected_match_level_a): +def test_reindex_multiindex_automatic_level(name, expected_match_level_a): """ Test automatic level detection when reindexing from Index to MultiIndex. """ - series = Series([26.73, 24.255], index=idx) + series = Series([26.73, 24.255], index=Index([81, 82], name=name)) target = MultiIndex.from_product( [[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"] ) From 213e50cbf5bb6f07764a681d34c548e06e6237fd Mon Sep 17 00:00:00 2001 From: Roline Stapny Saldanha Date: Wed, 30 Jul 2025 01:12:21 -0700 Subject: [PATCH 3/4] BUG: Fix DataFrame.reindex and Series.reindex loosing values when reindexing single to multiindex --- doc/source/whatsnew/v3.0.0.rst | 3 +- pandas/core/generic.py | 12 +++++++ pandas/core/series.py | 9 ----- pandas/tests/frame/methods/test_reindex.py | 39 ++++----------------- pandas/tests/series/methods/test_reindex.py | 11 ------ 5 files changed, 19 insertions(+), 55 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 631a21d6570fa..eda8c8886b451 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -788,8 +788,7 @@ MultiIndex - Bug in :class:`DataFrame` arithmetic operations in case of unaligned MultiIndex columns (:issue:`60498`) - Bug in :class:`DataFrame` arithmetic operations with :class:`Series` in case of unaligned MultiIndex (:issue:`61009`) - Bug in :meth:`MultiIndex.from_tuples` causing wrong output with input of type tuples having NaN values (:issue:`60695`, :issue:`60988`) -- Bug in :meth:`DataFrame.reindex` where reindexing a Dataframe with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`) -- Bug in :meth:`Series.reindex` where reindexing a Series with a named Index to a MultiIndex would incorrectly set all values to ``NaN``. Now correctly preserves values when the source index name matches a target level name (:issue:`60923`) +- Bug in :meth:`DataFrame.reindex` and :meth:`Series.reindex` where reindexing :class:`Index` to a :class:`MultiIndex` would incorrectly set all values to ``NaN``.(:issue:`60923`) I/O ^^^ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cbd853886a0f4..2e921a2a3aa96 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5373,6 +5373,18 @@ def reindex( """ # TODO: Decide if we care about having different examples for different # kinds + + # Automatically detect matching level when reindexing from Index to MultiIndex. + # This prevents values from being incorrectly set to NaN when the source index + # name matches a index name in the target MultiIndex + if ( + level is None + and index is not None + and isinstance(index, MultiIndex) + and not isinstance(self.index, MultiIndex) + and self.index.name in index.names + ): + level = self.index.name self._check_copy_deprecation(copy) if index is not None and columns is not None and labels is not None: diff --git a/pandas/core/series.py b/pandas/core/series.py index e327cddfe3f1a..63ef68f202a6e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4886,15 +4886,6 @@ def reindex( # type: ignore[override] limit: int | None = None, tolerance=None, ) -> Series: - # Automatically detect matching level when reindexing from Index to MultiIndex. - # This prevents values from being incorrectly set to NaN when the source index - # name matches a level name in the target MultiIndex - if ( - level is None - and isinstance(index, MultiIndex) - and self.index.name in index.names - ): - level = self.index.name return super().reindex( index=index, method=method, diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index 3f99afd0b37bf..533902af9aa06 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -1260,53 +1260,26 @@ def test_invalid_method(self): df.reindex([1, 0, 2], method="asfreq") def test_reindex_index_name_matches_multiindex_level(self): - """ - Test automatic level detection when reindexing from Index to MultiIndex. - When source index name matches a level name in target MultiIndex and level - is not specified, should behave same as if level was explicitly set. - """ - # Create source DataFrame with named Index df = DataFrame( {"value": [1, 2], "other": ["A", "B"]}, index=Index([10, 20], name="a"), ) - - # Create target MultiIndex with matching level name target = MultiIndex.from_product( [[10, 20], ["x", "y"]], - names=["a", "b"], # 'a' matches source index name + names=["a", "b"], ) result = df.reindex(index=target) expected = df.reindex(index=target, level="a") - tm.assert_frame_equal(result, expected) - # Verify values are propagated correctly - expected_values = { - (10, "x"): {"value": 1, "other": "A"}, - (10, "y"): {"value": 1, "other": "A"}, - (20, "x"): {"value": 2, "other": "B"}, - (20, "y"): {"value": 2, "other": "B"}, - } - for idx, expected_row in expected_values.items(): - for col, val in expected_row.items(): - assert result.loc[idx, col] == val - def test_reindex_index_name_no_match_multiindex_level(self): - """ - Test reindexing behavior when source index name doesn't match any level - in target MultiIndex. Should fill with NaN since there's no level match. - """ df = DataFrame({"value": [1, 2]}, index=Index([10, 20], name="different_name")) - target = MultiIndex.from_product([[10, 20], ["x", "y"]], names=["a", "b"]) result = df.reindex(index=target) - - # Should fill with NaN since no level match - assert result.isna().all().all() - - # Verify shape is correct - assert result.index.equals(target) - assert result.columns.equals(df.columns) + expected = DataFrame( + {"value": [np.nan] * 4}, + index=MultiIndex.from_product([[10, 20], ["x", "y"]], names=["a", "b"]), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 8abd3e40deed9..0f0027db3a4f7 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -439,18 +439,12 @@ def test_reindex_expand_nonnano_nat(dtype): @pytest.mark.parametrize( "name, expected_match_level_a", [ - # Source index has matching name - should match level "a" ("a", True), - # Source index has no name - should not match any level (None, False), - # Source index name doesn't match any level - should not match ("x", False), ], ) def test_reindex_multiindex_automatic_level(name, expected_match_level_a): - """ - Test automatic level detection when reindexing from Index to MultiIndex. - """ series = Series([26.73, 24.255], index=Index([81, 82], name=name)) target = MultiIndex.from_product( [[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"] @@ -459,19 +453,14 @@ def test_reindex_multiindex_automatic_level(name, expected_match_level_a): result = series.reindex(target) if expected_match_level_a: - # Should match behavior of explicit level="a" expected = series.reindex(target, level="a") else: - # Should contain all NaN values expected = Series(np.nan, index=target, dtype=series.dtype) tm.assert_series_equal(result, expected) def test_reindex_multiindex_explicit_level_overrides(): - """ - Test that explicit level parameter overrides automatic detection. - """ series = Series([26.73, 24.255], index=Index([81, 82], name="a")) target = MultiIndex.from_product( [[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"] From 954f8b107884c8e40bda023ac9f24ab5ab44fcb5 Mon Sep 17 00:00:00 2001 From: Roline Stapny Saldanha Date: Thu, 31 Jul 2025 08:48:06 -0700 Subject: [PATCH 4/4] Automatic level detection for dataframe reindex moved to generic.py and remove non-relevant test --- pandas/core/frame.py | 12 ------------ pandas/tests/series/methods/test_reindex.py | 11 ----------- 2 files changed, 23 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 72ce4bcb16e5f..d657f2124c61f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5355,18 +5355,6 @@ def reindex( limit: int | None = None, tolerance=None, ) -> DataFrame: - # Automatically detect matching level when reindexing from Index to MultiIndex. - # This prevents values from being incorrectly set to NaN when the source index - # name matches a level name in the target MultiIndex. Only applies when source - # is not already a MultiIndex. - if ( - level is None - and index is not None - and isinstance(index, MultiIndex) - and not isinstance(self.index, MultiIndex) - and self.index.name in index.names - ): - level = self.index.name return super().reindex( labels=labels, index=index, diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 0f0027db3a4f7..979acec6d5182 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -458,14 +458,3 @@ def test_reindex_multiindex_automatic_level(name, expected_match_level_a): expected = Series(np.nan, index=target, dtype=series.dtype) tm.assert_series_equal(result, expected) - - -def test_reindex_multiindex_explicit_level_overrides(): - series = Series([26.73, 24.255], index=Index([81, 82], name="a")) - target = MultiIndex.from_product( - [[81, 82], [np.nan], ["2018-06-01", "2018-07-01"]], names=["a", "b", "c"] - ) - - result = series.reindex(target, level=0) - expected = series.reindex(target, level="a") - tm.assert_series_equal(result, expected)