not-infer-string compat

pandas-dev · jbrockmendel · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025
commit 5369afad7affc8d489c65c08bac61c3423f12185
diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -5,6 +5,8 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
+
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (
@@ -13,7 +15,10 @@
 )
 from pandas.util._exceptions import find_stack_level
 
-from pandas.core.dtypes.common import pandas_dtype
+from pandas.core.dtypes.common import (
+    is_string_dtype,
+    pandas_dtype,
+)
 from pandas.core.dtypes.dtypes import (
     BaseMaskedDtype,
 )
@@ -339,13 +344,29 @@ def read(self) -> DataFrame:
                 ser = frame[key]
                 if isinstance(ser.dtype, BaseMaskedDtype):
                     new_dtype[key] = ser.dtype.numpy_dtype
+                    if (
+                        key in old_dtype
+                        and not using_string_dtype()
+                        and is_string_dtype(old_dtype[key])
+                        and not isinstance(old_dtype[key], StringDtype)
+                        and ser.array._hasna
+                    ):
+                        # Cast to make sure we get "NaN" string instead of "NA"
+                        frame[key] = ser.astype(old_dtype[key])
+                        frame.loc[ser.isna(), key] = np.nan
+                        old_dtype[key] = object  # Avoid re-casting
                 elif isinstance(ser.dtype, StringDtype):
                     # We cast here in case the user passed "category" in
                     #  order to get the correct dtype.categories.dtype
                     #  e.g. test_categorical_dtype_utf16
-                    sdt = StringDtype(na_value=np.nan)
+                    if not using_string_dtype():
+                        sdt = np.dtype(object)
+                        frame[key] = ser.astype(sdt)
+                        frame.loc[ser.isna(), key] = np.nan
+                    else:
+                        sdt = StringDtype(na_value=np.nan)
+                        frame[key] = frame[key].astype(sdt)
                     new_dtype[key] = sdt  # type: ignore[assignment]
-                    frame[key] = frame[key].astype(new_dtype[key])
 
             new_dtype.update(old_dtype)
             self.dtype = new_dtype