diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 0fe69f6d1ebc2..8bee4740b3951 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2215,8 +2215,16 @@ def _repr_categories(self) -> list[str]: ) from pandas.io.formats import format as fmt + formatter = None + if self.categories.dtype == "str": + # the extension array formatter defaults to boxed=True in format_array + # override here to boxed=False to be consistent with QUOTE_NONNUMERIC + formatter = cast(ExtensionArray, self.categories._values)._formatter( + boxed=False + ) + format_array = partial( - fmt.format_array, formatter=None, quoting=QUOTE_NONNUMERIC + fmt.format_array, formatter=formatter, quoting=QUOTE_NONNUMERIC ) if len(self.categories) > max_categories: num = max_categories // 2 diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 3a2c489920eb0..7929dfc927034 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -19,16 +19,11 @@ class TestCategoricalReprWithFactor: def test_print(self, using_infer_string): factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True) - if using_infer_string: - expected = [ - "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", - "Categories (3, str): [a < b < c]", - ] - else: - expected = [ - "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", - "Categories (3, object): ['a' < 'b' < 'c']", - ] + dtype = "str" if using_infer_string else "object" + expected = [ + "['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']", + f"Categories (3, {dtype}): ['a' < 'b' < 'c']", + ] expected = "\n".join(expected) actual = repr(factor) assert actual == expected diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index 166e628ae4b3e..260b9bf97fea8 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas._libs import index as libindex from pandas._libs.arrays import NDArrayBacked @@ -196,7 +194,6 @@ def test_unique(self, data, categories, expected_data, ordered): expected = CategoricalIndex(expected_data, dtype=dtype) tm.assert_index_equal(idx.unique(), expected) - @pytest.mark.xfail(using_string_dtype(), reason="repr doesn't roundtrip") def test_repr_roundtrip(self): ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) str(ci) diff --git a/pandas/tests/series/test_formats.py b/pandas/tests/series/test_formats.py index 4f93e7424bfd5..10588c3b744f2 100644 --- a/pandas/tests/series/test_formats.py +++ b/pandas/tests/series/test_formats.py @@ -318,38 +318,27 @@ def test_categorical_repr(self, using_infer_string): assert exp == a.__str__() a = Series(Categorical(["a", "b"] * 25)) + exp = ( + "0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']" + ) if using_infer_string: - exp = ( - "0 a\n1 b\n" - " ..\n" - "48 a\n49 b\n" - "Length: 50, dtype: category\nCategories (2, str): [a, b]" - ) - else: - exp = ( - "0 a\n1 b\n" - " ..\n" - "48 a\n49 b\n" - "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']" - ) + exp = exp.replace("object", "str") with option_context("display.max_rows", 5): assert exp == repr(a) levs = list("abcdefghijklmnopqrstuvwxyz") a = Series(Categorical(["a", "b"], categories=levs, ordered=True)) + exp = ( + "0 a\n1 b\n" + "dtype: category\n" + "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... " + "'w' < 'x' < 'y' < 'z']" + ) if using_infer_string: - exp = ( - "0 a\n1 b\n" - "dtype: category\n" - "Categories (26, str): [a < b < c < d ... w < x < y < z]" - ) - else: - exp = ( - "0 a\n1 b\n" - "dtype: category\n" - "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... " - "'w' < 'x' < 'y' < 'z']" - ) + exp = exp.replace("object", "str") assert exp == a.__str__() def test_categorical_series_repr(self): diff --git a/pandas/tests/util/test_assert_series_equal.py b/pandas/tests/util/test_assert_series_equal.py index 0d56885a1cb84..9cc9fb924ed67 100644 --- a/pandas/tests/util/test_assert_series_equal.py +++ b/pandas/tests/util/test_assert_series_equal.py @@ -215,24 +215,15 @@ def test_series_equal_numeric_values_mismatch(rtol): def test_series_equal_categorical_values_mismatch(rtol, using_infer_string): - if using_infer_string: - msg = """Series are different - -Series values are different \\(66\\.66667 %\\) -\\[index\\]: \\[0, 1, 2\\] -\\[left\\]: \\['a', 'b', 'c'\\] -Categories \\(3, str\\): \\[a, b, c\\] -\\[right\\]: \\['a', 'c', 'b'\\] -Categories \\(3, str\\): \\[a, b, c\\]""" - else: - msg = """Series are different + dtype = "str" if using_infer_string else "object" + msg = f"""Series are different Series values are different \\(66\\.66667 %\\) \\[index\\]: \\[0, 1, 2\\] \\[left\\]: \\['a', 'b', 'c'\\] -Categories \\(3, object\\): \\['a', 'b', 'c'\\] +Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\] \\[right\\]: \\['a', 'c', 'b'\\] -Categories \\(3, object\\): \\['a', 'b', 'c'\\]""" +Categories \\(3, {dtype}\\): \\['a', 'b', 'c'\\]""" s1 = Series(Categorical(["a", "b", "c"])) s2 = Series(Categorical(["a", "c", "b"]))