BUG: groupby.idxmin/idxmax with all NA values should raise (#62026)

rhshadrach · web-flow · commit eb489f26e8db · 2025-08-05T11:55:41.000-07:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -507,7 +507,7 @@ Renamed the following offset aliases (:issue:`57986`):
 
 Other Removals
 ^^^^^^^^^^^^^^
-- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
+- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when a group has all NA values, or when used with ``skipna=False`` and any NA value is encountered (:issue:`10694`, :issue:`57745`)
 - :func:`concat` no longer ignores empty objects when determining output dtypes (:issue:`39122`)
 - :func:`concat` with all-NA entries no longer ignores the dtype of those entries when determining the result dtype (:issue:`40893`)
 - :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -2048,9 +2048,8 @@ def group_idxmin_idxmax(
         group_min_or_max = np.empty_like(out, dtype=values.dtype)
         seen = np.zeros_like(out, dtype=np.uint8)
 
-    # When using transform, we need a valid value for take in the case
-    # a category is not observed; these values will be dropped
-    out[:] = 0
+    # Sentinel for no valid values.
+    out[:] = -1
 
     with nogil(numeric_object_t is not object):
         for i in range(N):
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1404,7 +1404,15 @@ def idxmin(self, skipna: bool = True) -> Series:
         Raises
         ------
         ValueError
-            If the Series is empty or skipna=False and any value is NA.
+            When there are no valid values for a group. Then can happen if:
+
+                * There is an unobserved group and ``observed=False``.
+                * All values for a group are NA.
+                * Some values for a group are NA and ``skipna=False``.
+
+        .. versionchanged:: 3.0.0
+            Previously if all values for a group are NA or some values for a group are
+            NA and ``skipna=False``, this method would return NA. Now it raises instead.
 
         See Also
         --------
@@ -1457,7 +1465,15 @@ def idxmax(self, skipna: bool = True) -> Series:
         Raises
         ------
         ValueError
-            If the Series is empty or skipna=False and any value is NA.
+            When there are no valid values for a group. Then can happen if:
+
+                * There is an unobserved group and ``observed=False``.
+                * All values for a group are NA.
+                * Some values for a group are NA and ``skipna=False``.
+
+        .. versionchanged:: 3.0.0
+            Previously if all values for a group are NA or some values for a group are
+            NA and ``skipna=False``, this method would return NA. Now it raises instead.
 
         See Also
         --------
@@ -2597,7 +2613,15 @@ def idxmax(
         Raises
         ------
         ValueError
-            * If a column is empty or skipna=False and any value is NA.
+            When there are no valid values for a group. Then can happen if:
+
+                * There is an unobserved group and ``observed=False``.
+                * All values for a group are NA.
+                * Some values for a group are NA and ``skipna=False``.
+
+        .. versionchanged:: 3.0.0
+            Previously if all values for a group are NA or some values for a group are
+            NA and ``skipna=False``, this method would return NA. Now it raises instead.
 
         See Also
         --------
@@ -2663,7 +2687,15 @@ def idxmin(
         Raises
         ------
         ValueError
-            * If a column is empty or skipna=False and any value is NA.
+            When there are no valid values for a group. Then can happen if:
+
+                * There is an unobserved group and ``observed=False``.
+                * All values for a group are NA.
+                * Some values for a group are NA and ``skipna=False``.
+
+        .. versionchanged:: 3.0.0
+            Previously if all values for a group are NA or some values for a group are
+            NA and ``skipna=False``, this method would return NA. Now it raises instead.
 
         See Also
         --------
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1784,7 +1784,8 @@ def array_func(values: ArrayLike) -> ArrayLike:
         new_mgr = data.grouped_reduce(array_func)
         res = self._wrap_agged_manager(new_mgr)
         if how in ["idxmin", "idxmax"]:
-            res = self._wrap_idxmax_idxmin(res)
+            # mypy expects how to be Literal["idxmin", "idxmax"].
+            res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"])  # type: ignore[arg-type]
         out = self._wrap_aggregated_output(res)
         return out
 
@@ -5715,10 +5716,17 @@ def _idxmax_idxmin(
         )
         return result
 
-    def _wrap_idxmax_idxmin(self, res: NDFrameT) -> NDFrameT:
+    def _wrap_idxmax_idxmin(
+        self, res: NDFrameT, how: Literal["idxmax", "idxmin"], skipna: bool
+    ) -> NDFrameT:
         index = self.obj.index
         if res.size == 0:
             result = res.astype(index.dtype)
+        elif skipna and res.lt(0).any(axis=None):
+            raise ValueError(
+                f"{type(self).__name__}.{how} with skipna=True encountered all NA "
+                f"values in a group."
+            )
         else:
             if isinstance(index, MultiIndex):
                 index = index.to_flat_index()
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -286,18 +286,22 @@ def __init__(
         self._indexer: npt.NDArray[np.intp] | None = None
 
     def _get_grouper(
-        self, obj: NDFrameT, validate: bool = True
+        self, obj: NDFrameT, validate: bool = True, observed: bool = True
     ) -> tuple[ops.BaseGrouper, NDFrameT]:
         """
         Parameters
         ----------
         obj : Series or DataFrame
+            Object being grouped.
         validate : bool, default True
-            if True, validate the grouper
+            If True, validate the grouper.
+        observed : bool, default True
+            Whether only observed groups should be in the result. Only
+            has an impact when grouping on categorical data.
 
         Returns
         -------
-        a tuple of grouper, obj (possibly sorted)
+        A tuple of grouper, obj (possibly sorted)
         """
         obj, _, _ = self._set_grouper(obj)
         grouper, _, obj = get_grouper(
@@ -307,6 +311,7 @@ def _get_grouper(
             sort=self.sort,
             validate=validate,
             dropna=self.dropna,
+            observed=observed,
         )
 
         return grouper, obj
@@ -787,7 +792,7 @@ def get_grouper(
 
     # a passed-in Grouper, directly convert
     if isinstance(key, Grouper):
-        grouper, obj = key._get_grouper(obj, validate=False)
+        grouper, obj = key._get_grouper(obj, validate=False, observed=observed)
         if key.key is None:
             return grouper, frozenset(), obj
         else:
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -2305,8 +2305,22 @@ def _get_resampler(self, obj: NDFrame) -> Resampler:
         )
 
     def _get_grouper(
-        self, obj: NDFrameT, validate: bool = True
+        self, obj: NDFrameT, validate: bool = True, observed: bool = True
     ) -> tuple[BinGrouper, NDFrameT]:
+        """
+        Parameters
+        ----------
+        obj : Series or DataFrame
+            Object being grouped.
+        validate : bool, default True
+            Unused. Only for compatibility with ``Grouper._get_grouper``.
+        observed : bool, default True
+            Unused. Only for compatibility with ``Grouper._get_grouper``.
+
+        Returns
+        -------
+        A tuple of grouper, obj (possibly sorted)
+        """
         # create the resampler and return our binner
         r = self._get_resampler(obj)
         return r._grouper, cast(NDFrameT, r.obj)
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
@@ -272,7 +272,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
     max_value = np.finfo(float_numpy_dtype).max
     df = DataFrame(
         {
-            "a": Series(np.repeat(range(1, 6), repeats=2), dtype="intp"),
+            "a": Series(np.repeat(range(1, 5), repeats=2), dtype="intp"),
             "b": Series(
                 [
                     np.nan,
@@ -283,8 +283,6 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
                     np.nan,
                     max_value,
                     np.nan,
-                    np.nan,
-                    np.nan,
                 ],
                 dtype=float_numpy_dtype,
             ),
@@ -299,7 +297,7 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
         return
     result = getattr(gb, how)(skipna=skipna)
     expected = DataFrame(
-        {"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp")
+        {"b": [1, 3, 4, 6]}, index=pd.Index(range(1, 5), name="a", dtype="intp")
     )
     tm.assert_frame_equal(result, expected)
 
@@ -1003,8 +1001,6 @@ def test_string_dtype_all_na(
         else:
             expected_dtype = "int64"
         expected_value = 1 if reduction_func == "size" else 0
-    elif reduction_func in ["idxmin", "idxmax"]:
-        expected_dtype, expected_value = "float64", np.nan
     elif not skipna or min_count > 0:
         expected_value = pd.NA
     elif reduction_func == "sum":
@@ -1032,8 +1028,11 @@ def test_string_dtype_all_na(
         with pytest.raises(TypeError, match=msg):
             method(*args, **kwargs)
         return
-    elif reduction_func in ["idxmin", "idxmax"] and not skipna:
-        msg = f"{reduction_func} with skipna=False encountered an NA value."
+    elif reduction_func in ["idxmin", "idxmax"]:
+        if skipna:
+            msg = f"{reduction_func} with skipna=True encountered all NA values"
+        else:
+            msg = f"{reduction_func} with skipna=False encountered an NA value."
         with pytest.raises(ValueError, match=msg):
             method(*args, **kwargs)
         return