From 98bedc4635718eefe50dfcf109f7c45d5f92f49a Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 5 Aug 2025 17:49:00 -0700
Subject: [PATCH 01/39] BUG: read_csv with engine=pyarrow and numpy-nullable
 dtype

---
 doc/source/whatsnew/v3.0.0.rst                |  1 +
 pandas/io/parsers/arrow_parser_wrapper.py     | 74 +++++++++++++++----
 .../io/parser/dtypes/test_dtypes_basic.py     |  4 -
 pandas/tests/io/parser/test_na_values.py      | 17 ++++-
 4 files changed, 74 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index ec5027840dfd5..0f8e026761db0 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -814,6 +814,7 @@ I/O
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 - Bug in :meth:`read_csv` raising ``TypeError`` when ``nrows`` and ``iterator`` are specified without specifying a ``chunksize``. (:issue:`59079`)
 - Bug in :meth:`read_csv` where the order of the ``na_values`` makes an inconsistency when ``na_values`` is a list non-string values. (:issue:`59303`)
+- Bug in :meth:`read_csv` with ``engine="pyarrow"`` and ``dtype="Int64"`` losing precision (:issue:`56136`)
 - Bug in :meth:`read_excel` raising ``ValueError`` when passing array of boolean values when ``dtype="boolean"``. (:issue:`58159`)
 - Bug in :meth:`read_html` where ``rowspan`` in header row causes incorrect conversion to ``DataFrame``. (:issue:`60210`)
 - Bug in :meth:`read_json` ignoring the given ``dtype`` when ``engine="pyarrow"`` (:issue:`59516`)
diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 8cadde1ad6537..e446f7f4fb897 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -3,6 +3,8 @@
 from typing import TYPE_CHECKING
 import warnings
 
+import numpy as np
+
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (
@@ -12,8 +14,13 @@
 from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.common import pandas_dtype
+from pandas.core.dtypes.dtypes import (
+    BaseMaskedDtype,
+)
 from pandas.core.dtypes.inference import is_integer
 
+from pandas.core.arrays.string_ import StringDtype
+
 from pandas.io._util import arrow_table_to_pandas
 from pandas.io.parsers.base_parser import ParserBase
 
@@ -140,20 +147,7 @@ def handle_warning(invalid_row) -> str:
             "encoding": self.encoding,
         }
 
-    def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
-        """
-        Processes data read in based on kwargs.
-
-        Parameters
-        ----------
-        frame: DataFrame
-            The DataFrame to process.
-
-        Returns
-        -------
-        DataFrame
-            The processed DataFrame.
-        """
+    def _finalize_column_names(self, frame: DataFrame) -> DataFrame:
         num_cols = len(frame.columns)
         multi_index_named = True
         if self.header is None:
@@ -196,6 +190,23 @@ def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
             if self.header is None and not multi_index_named:
                 frame.index.names = [None] * len(frame.index.names)
 
+        return frame
+
+    def _finalize_pandas_output(self, frame: DataFrame) -> DataFrame:
+        """
+        Processes data read in based on kwargs.
+
+        Parameters
+        ----------
+        frame: DataFrame
+            The DataFrame to process.
+
+        Returns
+        -------
+        DataFrame
+            The processed DataFrame.
+        """
+
         if self.dtype is not None:
             # Ignore non-existent columns from dtype mapping
             # like other parsers do
@@ -282,6 +293,14 @@ def read(self) -> DataFrame:
 
             table = table.cast(new_schema)
 
+        workaround = False
+        pass_backend = dtype_backend
+        if self.dtype is not None and dtype_backend != "pyarrow":
+            # We pass dtype_backend="pyarrow" and subsequently cast
+            #  to avoid lossy conversion e.g. GH#56136
+            workaround = True
+            pass_backend = "numpy_nullable"
+
         with warnings.catch_warnings():
             warnings.filterwarnings(
                 "ignore",
@@ -289,7 +308,32 @@ def read(self) -> DataFrame:
                 DeprecationWarning,
             )
             frame = arrow_table_to_pandas(
-                table, dtype_backend=dtype_backend, null_to_int64=True
+                table, dtype_backend=pass_backend, null_to_int64=True
             )
 
+        frame = self._finalize_column_names(frame)
+
+        if workaround and dtype_backend != "numpy_nullable":
+            old_dtype = self.dtype
+            if not isinstance(old_dtype, dict):
+                # e.g. test_categorical_dtype_utf16
+                old_dtype = dict.fromkeys(frame.columns, old_dtype)
+
+            # _finalize_pandas_output will call astype, but we need to make
+            #  sure all keys are populated appropriately.
+            new_dtype = {}
+            for key in frame.columns:
+                ser = frame[key]
+                if isinstance(ser.dtype, BaseMaskedDtype):
+                    new_dtype[key] = ser.dtype.numpy_dtype
+                elif isinstance(ser.dtype, StringDtype):
+                    # We cast here in case the user passed "category" in
+                    #  order to get the correct dtype.categories.dtype
+                    #  e.g. test_categorical_dtype_utf16
+                    new_dtype[key] = StringDtype(na_value=np.nan)
+                    frame[key] = frame[key].astype(new_dtype[key])
+
+            new_dtype.update(old_dtype)
+            self.dtype = new_dtype
+
         return self._finalize_pandas_output(frame)
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
index 75b7cf0d42cb8..e4563afc631c5 100644
--- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
+++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -518,9 +518,6 @@ def test_dtype_backend_pyarrow(all_parsers, request):
     tm.assert_frame_equal(result, expected)
 
 
-# pyarrow engine failing:
-# https://github.com/pandas-dev/pandas/issues/56136
-@pytest.mark.usefixtures("pyarrow_xfail")
 def test_ea_int_avoid_overflow(all_parsers):
     # GH#32134
     parser = all_parsers
@@ -594,7 +591,6 @@ def test_string_inference_object_dtype(all_parsers, dtype, using_infer_string):
     tm.assert_frame_equal(result, expected)
 
 
-@xfail_pyarrow
 def test_accurate_parsing_of_large_integers(all_parsers):
     # GH#52505
     data = """SYMBOL,MOMENT,ID,ID_DEAL
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index 213fa2c01cef4..d60074243a526 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -670,11 +670,14 @@ def test_inf_na_values_with_int_index(all_parsers):
     tm.assert_frame_equal(out, expected)
 
 
-@xfail_pyarrow  # mismatched shape
 @pytest.mark.parametrize("na_filter", [True, False])
-def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
+def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter, request):
     # see gh-20377
     parser = all_parsers
+    if parser.engine == "pyarrow" and na_filter is False:
+        mark = pytest.mark.xfail(reason="mismatched shape")
+        request.applymarker(mark)
+
     data = "a,b,c\n1,,3\n4,5,6"
 
     # na_filter=True --> missing value becomes NaN.
@@ -798,7 +801,15 @@ def test_bool_and_nan_to_int(all_parsers):
 True
 False
 """
-    with pytest.raises(ValueError, match="convert|NoneType"):
+    msg = (
+        "cannot safely convert passed user dtype of int64 for "
+        "<class 'numpy.bool'> dtyped data in column 0 due to NA values"
+    )
+    if parser.engine == "python":
+        msg = "Unable to convert column 0 to type int64"
+    elif parser.engine == "pyarrow":
+        msg = r"cannot convert NA to integer"
+    with pytest.raises(ValueError, match=msg):
         parser.read_csv(StringIO(data), dtype="int")
 
 

From 7aa640d2c30c4a99170110c4b97bd816649147c3 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 5 Aug 2025 18:14:49 -0700
Subject: [PATCH 02/39] mypy fixup, error message compat for 32bit builds

---
 pandas/io/parsers/arrow_parser_wrapper.py | 3 ++-
 pandas/tests/io/parser/test_na_values.py  | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index e446f7f4fb897..75cb16a93c493 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -330,7 +330,8 @@ def read(self) -> DataFrame:
                     # We cast here in case the user passed "category" in
                     #  order to get the correct dtype.categories.dtype
                     #  e.g. test_categorical_dtype_utf16
-                    new_dtype[key] = StringDtype(na_value=np.nan)
+                    sdt = StringDtype(na_value=np.nan)
+                    new_dtype[key] = sdt  # type: ignore[assignment]
                     frame[key] = frame[key].astype(new_dtype[key])
 
             new_dtype.update(old_dtype)
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index d60074243a526..d0cc92c5a73af 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -802,11 +802,11 @@ def test_bool_and_nan_to_int(all_parsers):
 False
 """
     msg = (
-        "cannot safely convert passed user dtype of int64 for "
+        "cannot safely convert passed user dtype of int(64|32) for "
         "<class 'numpy.bool'> dtyped data in column 0 due to NA values"
     )
     if parser.engine == "python":
-        msg = "Unable to convert column 0 to type int64"
+        msg = "Unable to convert column 0 to type int(64|32)"
     elif parser.engine == "pyarrow":
         msg = r"cannot convert NA to integer"
     with pytest.raises(ValueError, match=msg):

From e5b752ef6580486b8273f921d9a246bc32180bf8 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 5 Aug 2025 19:06:31 -0700
Subject: [PATCH 03/39] minimum version compat

---
 pandas/tests/io/parser/test_na_values.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
index d0cc92c5a73af..5f08f5ef466cf 100644
--- a/pandas/tests/io/parser/test_na_values.py
+++ b/pandas/tests/io/parser/test_na_values.py
@@ -803,7 +803,7 @@ def test_bool_and_nan_to_int(all_parsers):
 """
     msg = (
         "cannot safely convert passed user dtype of int(64|32) for "
-        "<class 'numpy.bool'> dtyped data in column 0 due to NA values"
+        "<class 'numpy.bool_?'> dtyped data in column 0 due to NA values"
     )
     if parser.engine == "python":
         msg = "Unable to convert column 0 to type int(64|32)"

From 323414c504446a30b3aa9a4f6fbdc286273a1a8d Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 5 Aug 2025 20:57:03 -0700
Subject: [PATCH 04/39] not-infer-string compat

---
 pandas/io/parsers/arrow_parser_wrapper.py | 27 ++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 75cb16a93c493..039841747c9a8 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -5,6 +5,8 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
+
 from pandas._libs import lib
 from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (
@@ -13,7 +15,10 @@
 )
 from pandas.util._exceptions import find_stack_level
 
-from pandas.core.dtypes.common import pandas_dtype
+from pandas.core.dtypes.common import (
+    is_string_dtype,
+    pandas_dtype,
+)
 from pandas.core.dtypes.dtypes import (
     BaseMaskedDtype,
 )
@@ -326,13 +331,29 @@ def read(self) -> DataFrame:
                 ser = frame[key]
                 if isinstance(ser.dtype, BaseMaskedDtype):
                     new_dtype[key] = ser.dtype.numpy_dtype
+                    if (
+                        key in old_dtype
+                        and not using_string_dtype()
+                        and is_string_dtype(old_dtype[key])
+                        and not isinstance(old_dtype[key], StringDtype)
+                        and ser.array._hasna
+                    ):
+                        # Cast to make sure we get "NaN" string instead of "NA"
+                        frame[key] = ser.astype(old_dtype[key])
+                        frame.loc[ser.isna(), key] = np.nan
+                        old_dtype[key] = object  # Avoid re-casting
                 elif isinstance(ser.dtype, StringDtype):
                     # We cast here in case the user passed "category" in
                     #  order to get the correct dtype.categories.dtype
                     #  e.g. test_categorical_dtype_utf16
-                    sdt = StringDtype(na_value=np.nan)
+                    if not using_string_dtype():
+                        sdt = np.dtype(object)
+                        frame[key] = ser.astype(sdt)
+                        frame.loc[ser.isna(), key] = np.nan
+                    else:
+                        sdt = StringDtype(na_value=np.nan)
+                        frame[key] = frame[key].astype(sdt)
                     new_dtype[key] = sdt  # type: ignore[assignment]
-                    frame[key] = frame[key].astype(new_dtype[key])
 
             new_dtype.update(old_dtype)
             self.dtype = new_dtype

From 96bed9d5258a2b45858c4c16d5301dbcfe666882 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Wed, 6 Aug 2025 07:22:05 -0700
Subject: [PATCH 05/39] mypy fixup

---
 pandas/io/parsers/arrow_parser_wrapper.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 039841747c9a8..09759d4127ac8 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -351,9 +351,9 @@ def read(self) -> DataFrame:
                         frame[key] = ser.astype(sdt)
                         frame.loc[ser.isna(), key] = np.nan
                     else:
-                        sdt = StringDtype(na_value=np.nan)
+                        sdt = StringDtype(na_value=np.nan)  # type: ignore[assignment]
                         frame[key] = frame[key].astype(sdt)
-                    new_dtype[key] = sdt  # type: ignore[assignment]
+                    new_dtype[key] = sdt
 
             new_dtype.update(old_dtype)
             self.dtype = new_dtype

From 1fa7e06a3d24cd3373d7fae9277ed22d26446a36 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 11:08:46 -0700
Subject: [PATCH 06/39] API: rank with nullable dtypes preserve NA

---
 doc/source/whatsnew/v3.0.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 0f8e026761db0..8be62f04f1c6e 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -97,7 +97,6 @@ Other enhancements
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:

From b7a303a8022b3169c93f2926870bfa2a97e89f20 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 3 Aug 2025 14:08:00 -0700
Subject: [PATCH 07/39] API: improve dtype in df.where with EA other

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 8be62f04f1c6e..8c775193c3ead 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -97,7 +97,7 @@ Other enhancements
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
-
+- Improve the resulting dtypes in :meth:`DataFrame.where` and :meth:`DataFrame.mask` with :class:`ExtensionDtype` ``other`` (:issue:`??`)
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
 

From c3790ca13be6de1a502e54d56ec98a7a6da3edd2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 3 Aug 2025 14:09:58 -0700
Subject: [PATCH 08/39] GH refs

---
 doc/source/whatsnew/v3.0.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 8c775193c3ead..4de2c73a493e8 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -97,7 +97,6 @@ Other enhancements
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
-- Improve the resulting dtypes in :meth:`DataFrame.where` and :meth:`DataFrame.mask` with :class:`ExtensionDtype` ``other`` (:issue:`??`)
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
 

From eb01ef743fc09dd2bd93bc7a55780124a12291da Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 3 Aug 2025 14:23:55 -0700
Subject: [PATCH 09/39] doc fixup

---
 doc/source/whatsnew/v3.0.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 4de2c73a493e8..0f8e026761db0 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -97,6 +97,8 @@ Other enhancements
 - Support passing a :class:`Iterable[Hashable]` input to :meth:`DataFrame.drop_duplicates` (:issue:`59237`)
 - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
 - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
+-
+
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
 

From 1bcfbeb78a7f7ceb1b9bb778555ab724c7976ae1 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Jul 2025 15:47:02 -0700
Subject: [PATCH 10/39] BUG: Decimal(NaN) incorrectly allowed in ArrowEA
 constructor with timestamp type

---
 doc/source/whatsnew/v3.0.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 0f8e026761db0..e0eb8194f3235 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -721,6 +721,8 @@ Datetimelike
 - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
 - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
+- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`??`)
+-
 
 Timedelta
 ^^^^^^^^^

From 11df1f95fbdc321a11e3e47cc05721d2699cef7e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 3 Jul 2025 15:49:04 -0700
Subject: [PATCH 11/39] GH ref

---
 doc/source/whatsnew/v3.0.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index e0eb8194f3235..38ba80c0ace5d 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -721,7 +721,6 @@ Datetimelike
 - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
 - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
-- Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`??`)
 -
 
 Timedelta

From 606038602f57cd496294c5223ef5d5ba103989c8 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 4 Jul 2025 08:21:07 -0700
Subject: [PATCH 12/39] BUG: ArrowEA constructor with timestamp type

---
 doc/source/whatsnew/v3.0.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 38ba80c0ace5d..0f8e026761db0 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -721,7 +721,6 @@ Datetimelike
 - Bug in constructing arrays with :class:`ArrowDtype` with ``timestamp`` type incorrectly allowing ``Decimal("NaN")`` (:issue:`61773`)
 - Bug in constructing arrays with a timezone-aware :class:`ArrowDtype` from timezone-naive datetime objects incorrectly treating those as UTC times instead of wall times like :class:`DatetimeTZDtype` (:issue:`61775`)
 - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
--
 
 Timedelta
 ^^^^^^^^^

From 5e9eba70acd5dc2d0063df0fa9fffbfcd5885531 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 28 Jun 2025 10:07:44 -0700
Subject: [PATCH 13/39] POC: consistent NaN treatment for pyarrow dtypes

---
 pandas/_libs/parsers.pyx                 |  2 +-
 pandas/core/arrays/arrow/array.py        | 54 ++++++++++++++++++------
 pandas/core/arrays/string_.py            |  8 +++-
 pandas/core/generic.py                   | 19 ++++++++-
 pandas/tests/extension/test_arrow.py     |  2 +-
 pandas/tests/groupby/test_reductions.py  |  6 ++-
 pandas/tests/series/methods/test_rank.py |  9 ++++
 7 files changed, 81 insertions(+), 19 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 5b94f45490da4..1f5813940c058 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1461,7 +1461,7 @@ def _maybe_upcast(
         if isinstance(arr, IntegerArray) and arr.isna().all():
             # use null instead of int64 in pyarrow
             arr = arr.to_numpy(na_value=None)
-        arr = ArrowExtensionArray(pa.array(arr, from_pandas=True))
+        arr = ArrowExtensionArray(pa.array(arr))
 
     return arr
 
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index ad1d576bfec32..1fc97b41b8d4f 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -17,6 +17,7 @@
 import numpy as np
 
 from pandas._libs import lib
+from pandas._libs.missing import NA
 from pandas._libs.tslibs import (
     Timedelta,
     Timestamp,
@@ -353,7 +354,7 @@ def _from_sequence_of_strings(
                 # duration to string casting behavior
                 mask = isna(scalars)
                 if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
-                    strings = pa.array(strings, type=pa.string(), from_pandas=True)
+                    strings = pa.array(strings, type=pa.string())
                 strings = pc.if_else(mask, None, strings)
                 try:
                     scalars = strings.cast(pa.int64())
@@ -374,7 +375,7 @@ def _from_sequence_of_strings(
             if isinstance(strings, (pa.Array, pa.ChunkedArray)):
                 scalars = strings
             else:
-                scalars = pa.array(strings, type=pa.string(), from_pandas=True)
+                scalars = pa.array(strings, type=pa.string())
             scalars = pc.if_else(pc.equal(scalars, "1.0"), "1", scalars)
             scalars = pc.if_else(pc.equal(scalars, "0.0"), "0", scalars)
             scalars = scalars.cast(pa.bool_())
@@ -386,6 +387,13 @@ def _from_sequence_of_strings(
             from pandas.core.tools.numeric import to_numeric
 
             scalars = to_numeric(strings, errors="raise")
+            if not pa.types.is_decimal(pa_type):
+                # TODO: figure out why doing this cast breaks with decimal dtype
+                #  in test_from_sequence_of_strings_pa_array
+                mask = strings.is_null()
+                scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
+                # TODO: could we just do strings.cast(pa_type)?
+
         else:
             raise NotImplementedError(
                 f"Converting strings to {pa_type} is not implemented."
@@ -428,7 +436,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
         """
         if isinstance(value, pa.Scalar):
             pa_scalar = value
-        elif isna(value):
+        elif isna(value) and not lib.is_float(value):
             pa_scalar = pa.scalar(None, type=pa_type)
         else:
             # Workaround https://github.com/apache/arrow/issues/37291
@@ -445,7 +453,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
                     value = value.as_unit(pa_type.unit)
                 value = value._value
 
-            pa_scalar = pa.scalar(value, type=pa_type, from_pandas=True)
+            pa_scalar = pa.scalar(value, type=pa_type)
 
         if pa_type is not None and pa_scalar.type != pa_type:
             pa_scalar = pa_scalar.cast(pa_type)
@@ -477,6 +485,13 @@ def _box_pa_array(
             if copy:
                 value = value.copy()
             pa_array = value.__arrow_array__()
+
+        elif hasattr(value, "__arrow_array__"):
+            # e.g. StringArray
+            if copy:
+                value = value.copy()
+            pa_array = value.__arrow_array__()
+
         else:
             if (
                 isinstance(value, np.ndarray)
@@ -530,11 +545,24 @@ def _box_pa_array(
                 pa_array = pa.array(dta._ndarray, type=pa_type, mask=dta_mask)
                 return pa_array
 
+            mask = None
+            if getattr(value, "dtype", None) is None or value.dtype.kind not in "mfM":
+                # similar to isna(value) but exclude NaN
+                # TODO: cythonize!
+                mask = np.array([x is NA or x is None for x in value], dtype=bool)
+
+            from_pandas = False
+            if pa.types.is_integer(pa_type):
+                # If user specifically asks to cast a numpy float array with NaNs
+                #  to pyarrow integer, we'll treat those NaNs as NA
+                from_pandas = True
             try:
-                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+                pa_array = pa.array(
+                    value, type=pa_type, mask=mask, from_pandas=from_pandas
+                )
             except (pa.ArrowInvalid, pa.ArrowTypeError):
                 # GH50430: let pyarrow infer type, then cast
-                pa_array = pa.array(value, from_pandas=True)
+                pa_array = pa.array(value, mask=mask, from_pandas=from_pandas)
 
             if pa_type is None and pa.types.is_duration(pa_array.type):
                 # Workaround https://github.com/apache/arrow/issues/37291
@@ -542,7 +570,7 @@ def _box_pa_array(
 
                 value = to_timedelta(value)
                 value = value.to_numpy()
-                pa_array = pa.array(value, type=pa_type, from_pandas=True)
+                pa_array = pa.array(value, type=pa_type)
 
             if pa.types.is_duration(pa_array.type) and pa_array.null_count > 0:
                 # GH52843: upstream bug for duration types when originally
@@ -1208,7 +1236,7 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
         if not len(values):
             return np.zeros(len(self), dtype=bool)
 
-        result = pc.is_in(self._pa_array, value_set=pa.array(values, from_pandas=True))
+        result = pc.is_in(self._pa_array, value_set=pa.array(values))
         # pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls
         # to False
         return np.array(result, dtype=np.bool_)
@@ -2015,7 +2043,7 @@ def __setitem__(self, key, value) -> None:
                 raise ValueError("Length of indexer and values mismatch")
             chunks = [
                 *self._pa_array[:key].chunks,
-                pa.array([value], type=self._pa_array.type, from_pandas=True),
+                pa.array([value], type=self._pa_array.type),
                 *self._pa_array[key + 1 :].chunks,
             ]
             data = pa.chunked_array(chunks).combine_chunks()
@@ -2069,7 +2097,7 @@ def _rank_calc(
                 pa_type = pa.float64()
             else:
                 pa_type = pa.uint64()
-            result = pa.array(ranked, type=pa_type, from_pandas=True)
+            result = pa.array(ranked, type=pa_type)
             return result
 
         data = self._pa_array.combine_chunks()
@@ -2321,7 +2349,7 @@ def _to_numpy_and_type(value) -> tuple[np.ndarray, pa.DataType | None]:
         right, right_type = _to_numpy_and_type(right)
         pa_type = left_type or right_type
         result = np.where(cond, left, right)
-        return pa.array(result, type=pa_type, from_pandas=True)
+        return pa.array(result, type=pa_type)
 
     @classmethod
     def _replace_with_mask(
@@ -2364,7 +2392,7 @@ def _replace_with_mask(
             replacements = replacements.as_py()
         result = np.array(values, dtype=object)
         result[mask] = replacements
-        return pa.array(result, type=values.type, from_pandas=True)
+        return pa.array(result, type=values.type)
 
     # ------------------------------------------------------------------
     # GroupBy Methods
@@ -2443,7 +2471,7 @@ def _groupby_op(
             return type(self)(pa_result)
         else:
             # DatetimeArray, TimedeltaArray
-            pa_result = pa.array(result, from_pandas=True)
+            pa_result = pa.array(result)
             return type(self)(pa_result)
 
     def _apply_elementwise(self, func: Callable) -> list[list[Any]]:
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
index 198dc4c483277..719686ab71a29 100644
--- a/pandas/core/arrays/string_.py
+++ b/pandas/core/arrays/string_.py
@@ -502,6 +502,12 @@ def _str_map_str_or_object(
             if self.dtype.storage == "pyarrow":
                 import pyarrow as pa
 
+                # TODO: shouldn't this already be caught my passed mask?
+                #  it isn't in test_extract_expand_capture_groups_index
+                # mask = mask | np.array(
+                #    [x is libmissing.NA for x in result], dtype=bool
+                #    )
+
                 result = pa.array(
                     result, mask=mask, type=pa.large_string(), from_pandas=True
                 )
@@ -754,7 +760,7 @@ def __arrow_array__(self, type=None):
 
         values = self._ndarray.copy()
         values[self.isna()] = None
-        return pa.array(values, type=type, from_pandas=True)
+        return pa.array(values, type=type)
 
     def _values_for_factorize(self) -> tuple[np.ndarray, libmissing.NAType | float]:  # type: ignore[override]
         arr = self._ndarray
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 7c407b03965df..4707cb28ca060 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -9919,7 +9919,7 @@ def where(
     def where(
         self,
         cond,
-        other=np.nan,
+        other=lib.no_default,
         *,
         inplace: bool = False,
         axis: Axis | None = None,
@@ -10077,6 +10077,23 @@ def where(
                         stacklevel=2,
                     )
 
+        if other is lib.no_default:
+            if self.ndim == 1:
+                if isinstance(self.dtype, ExtensionDtype):
+                    other = self.dtype.na_value
+                else:
+                    other = np.nan
+            else:
+                if self._mgr.nblocks == 1 and isinstance(
+                    self._mgr.blocks[0].values.dtype, ExtensionDtype
+                ):
+                    # FIXME: checking this is kludgy!
+                    other = self._mgr.blocks[0].values.dtype.na_value
+                else:
+                    # FIXME: the same problem we had with Series will now
+                    #  show up column-by-column!
+                    other = np.nan
+
         other = common.apply_if_callable(other, self)
         return self._where(cond, other, inplace=inplace, axis=axis, level=level)
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index c3e1d33ec93df..4b322466a8b62 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -721,7 +721,7 @@ def test_EA_types(self, engine, data, dtype_backend, request):
                 pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")
             )
         df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
-        csv_output = df.to_csv(index=False, na_rep=np.nan)
+        csv_output = df.to_csv(index=False, na_rep=np.nan)  # should be NA?
         if pa.types.is_binary(pa_dtype):
             csv_output = BytesIO(csv_output)
         else:
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index e9527ed3a9c0e..e60e7d6bc05d4 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -379,8 +379,10 @@ def test_first_last_skipna(any_real_nullable_dtype, sort, skipna, how):
     df = DataFrame(
         {
             "a": [2, 1, 1, 2, 3, 3],
-            "b": [na_value, 3.0, na_value, 4.0, np.nan, np.nan],
-            "c": [na_value, 3.0, na_value, 4.0, np.nan, np.nan],
+            # TODO: test that has mixed na_value and NaN either working for
+            #  float or raising for int?
+            "b": [na_value, 3.0, na_value, 4.0, na_value, na_value],
+            "c": [na_value, 3.0, na_value, 4.0, na_value, na_value],
         },
         dtype=any_real_nullable_dtype,
     )
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index ecd52b2c8498a..49142a859e434 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -280,6 +280,13 @@ def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
 
         ser = ser if dtype is None else ser.astype(dtype)
         result = ser.rank(method=method)
+        if dtype == "float64[pyarrow]":
+            # the NaNs are not treated as NA
+            exp = exp.copy()
+            if method == "average":
+                exp[np.isnan(ser)] = 9.5
+            elif method == "dense":
+                exp[np.isnan(ser)] = 6
         tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
 
     @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
@@ -331,6 +338,8 @@ def test_rank_tie_methods_on_infs_nans(
             order = [ranks[1], ranks[0], ranks[2]]
         elif na_option == "bottom":
             order = [ranks[0], ranks[2], ranks[1]]
+        elif dtype == "float64[pyarrow]":
+            order = [ranks[0], [NA] * chunk, ranks[1]]
         else:
             order = [ranks[0], [np.nan] * chunk, ranks[1]]
         expected = order if ascending else order[::-1]

From 42c1190f4b273af2014b3a45ef1b212af2ef656d Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 28 Jun 2025 10:23:00 -0700
Subject: [PATCH 14/39] comment

---
 pandas/tests/extension/base/setitem.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index 185d6d750cace..99ab5d2f7e86f 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -422,6 +422,7 @@ def test_setitem_frame_2d_values(self, data):
         df.iloc[:-1] = df.iloc[:-1].copy()
         tm.assert_frame_equal(df, orig)
 
+        # FIXME: Breaks for pyarrow float dtype bc df.values changes NAs to NaN
         df.iloc[:] = df.values
         tm.assert_frame_equal(df, orig)
 

From ca686b4ae4f2ed6006251978ee9265dd8eb9652e Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 5 Jul 2025 09:41:02 -0700
Subject: [PATCH 15/39] Down to 40 failing tests

---
 pandas/_config/__init__.py           |  5 +++
 pandas/_libs/missing.pyi             |  1 +
 pandas/_libs/missing.pyx             | 18 ++++++++
 pandas/core/arrays/_utils.py         | 15 ++++++-
 pandas/core/arrays/arrow/array.py    | 66 +++++++++++++++++++---------
 pandas/core/arrays/base.py           |  3 ++
 pandas/core/arrays/masked.py         |  4 +-
 pandas/core/config_init.py           |  9 ++++
 pandas/tests/extension/test_arrow.py | 14 ++++--
 9 files changed, 109 insertions(+), 26 deletions(-)

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
index 463e8af7cc561..fbf388224254f 100644
--- a/pandas/_config/__init__.py
+++ b/pandas/_config/__init__.py
@@ -33,3 +33,8 @@
 def using_string_dtype() -> bool:
     _mode_options = _global_config["future"]
     return _mode_options["infer_string"]
+
+
+def using_pyarrow_strict_nans() -> bool:
+    _mode_options = _global_config["mode"]
+    return _mode_options["pyarrow_strict_nans"]
diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
index 6bf30a03cef32..6c76fe49330b6 100644
--- a/pandas/_libs/missing.pyi
+++ b/pandas/_libs/missing.pyi
@@ -14,3 +14,4 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object) -> bool: ...
 def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
+def is_pdna_or_none(values: npt.ndarray) -> npt.NDArray[np.bool_]: ...
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index c7f905c4d0be0..164a47cb5adb7 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -249,6 +249,24 @@ cdef bint checknull_with_nat_and_na(object obj):
     return checknull_with_nat(obj) or obj is C_NA
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def is_pdna_or_none(values: ndarray) -> ndarray:
+    cdef:
+        ndarray[uint8_t] result
+        Py_ssize_t i, N
+        object val
+
+    N = len(values)
+    result = np.zeros(N, dtype=np.uint8)
+
+    for i in range(N):
+        val = values[i]
+        if val is None or val is C_NA:
+            result[i] = True
+    return result.view(bool)
+
+
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def is_numeric_na(values: ndarray) -> ndarray:
diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py
index 6b46396d5efdf..9adde3846ca03 100644
--- a/pandas/core/arrays/_utils.py
+++ b/pandas/core/arrays/_utils.py
@@ -7,7 +7,10 @@
 
 import numpy as np
 
+from pandas._config import using_pyarrow_strict_nans
+
 from pandas._libs import lib
+from pandas._libs.missing import NA
 from pandas.errors import LossySetitemError
 
 from pandas.core.dtypes.cast import np_can_hold_element
@@ -21,7 +24,11 @@
 
 
 def to_numpy_dtype_inference(
-    arr: ArrayLike, dtype: npt.DTypeLike | None, na_value, hasna: bool
+    arr: ArrayLike,
+    dtype: npt.DTypeLike | None,
+    na_value,
+    hasna: bool,
+    is_pyarrow: bool = True,
 ) -> tuple[npt.DTypeLike, Any]:
     if dtype is None and is_numeric_dtype(arr.dtype):
         dtype_given = False
@@ -34,7 +41,11 @@ def to_numpy_dtype_inference(
                 else:
                     dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
                 if na_value is lib.no_default:
-                    na_value = np.nan
+                    if is_pyarrow and using_pyarrow_strict_nans():
+                        na_value = NA
+                        dtype = np.dtype(object)
+                    else:
+                        na_value = np.nan
         else:
             dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
     elif dtype is not None:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 1fc97b41b8d4f..9a969ba352122 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -16,8 +16,10 @@
 
 import numpy as np
 
+from pandas._config import using_pyarrow_strict_nans
+
 from pandas._libs import lib
-from pandas._libs.missing import NA
+from pandas._libs.missing import is_pdna_or_none
 from pandas._libs.tslibs import (
     Timedelta,
     Timestamp,
@@ -326,6 +328,11 @@ def _from_sequence_of_strings(
         """
         Construct a new ExtensionArray from a sequence of strings.
         """
+        mask = isna(strings)
+
+        if isinstance(strings, cls):
+            strings = strings._pa_array
+
         pa_type = to_pyarrow_type(dtype)
         if (
             pa_type is None
@@ -344,22 +351,35 @@ def _from_sequence_of_strings(
             from pandas.core.tools.datetimes import to_datetime
 
             scalars = to_datetime(strings, errors="raise").date
+
+            if isinstance(strings, cls):
+                # Avoid an object path
+                # TODO: this assumes that pyarrows str->date casting is the
+                # same as to_datetime. Is that a fair assumption?
+                scalars = strings._pa_array.cast(pa_type)
+            else:
+                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
+
         elif pa.types.is_duration(pa_type):
             from pandas.core.tools.timedeltas import to_timedelta
 
             scalars = to_timedelta(strings, errors="raise")
+
             if pa_type.unit != "ns":
                 # GH51175: test_from_sequence_of_strings_pa_array
                 # attempt to parse as int64 reflecting pyarrow's
                 # duration to string casting behavior
                 mask = isna(scalars)
-                if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
-                    strings = pa.array(strings, type=pa.string())
+                if isinstance(strings, cls):
+                    strings = strings._pa_array
+                elif not isinstance(strings, (pa.Array, pa.ChunkedArray)):
+                    strings = pa.array(strings, type=pa.string(), mask=mask)
                 strings = pc.if_else(mask, None, strings)
                 try:
                     scalars = strings.cast(pa.int64())
                 except pa.ArrowInvalid:
                     pass
+
         elif pa.types.is_time(pa_type):
             from pandas.core.tools.times import to_time
 
@@ -375,7 +395,7 @@ def _from_sequence_of_strings(
             if isinstance(strings, (pa.Array, pa.ChunkedArray)):
                 scalars = strings
             else:
-                scalars = pa.array(strings, type=pa.string())
+                scalars = pa.array(strings, type=pa.string(), mask=mask)
             scalars = pc.if_else(pc.equal(scalars, "1.0"), "1", scalars)
             scalars = pc.if_else(pc.equal(scalars, "0.0"), "0", scalars)
             scalars = scalars.cast(pa.bool_())
@@ -387,12 +407,16 @@ def _from_sequence_of_strings(
             from pandas.core.tools.numeric import to_numeric
 
             scalars = to_numeric(strings, errors="raise")
-            if not pa.types.is_decimal(pa_type):
+            if not pa.types.is_decimal(pa_type) and isinstance(
+                strings, (pa.Array, pa.ChunkedArray)
+            ):
                 # TODO: figure out why doing this cast breaks with decimal dtype
                 #  in test_from_sequence_of_strings_pa_array
                 mask = strings.is_null()
                 scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
                 # TODO: could we just do strings.cast(pa_type)?
+            elif mask is not None:
+                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
 
         else:
             raise NotImplementedError(
@@ -546,23 +570,20 @@ def _box_pa_array(
                 return pa_array
 
             mask = None
-            if getattr(value, "dtype", None) is None or value.dtype.kind not in "mfM":
-                # similar to isna(value) but exclude NaN
-                # TODO: cythonize!
-                mask = np.array([x is NA or x is None for x in value], dtype=bool)
-
-            from_pandas = False
-            if pa.types.is_integer(pa_type):
-                # If user specifically asks to cast a numpy float array with NaNs
-                #  to pyarrow integer, we'll treat those NaNs as NA
-                from_pandas = True
+            if getattr(value, "dtype", None) is None or value.dtype.kind not in "mMf":
+                try:
+                    arr_value = np.asarray(value)
+                except ValueError:
+                    # e.g. list dtype with mixed-length lists
+                    arr_value = np.asarray(value, dtype=object)
+                # similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
+                mask = is_pdna_or_none(arr_value)
+
             try:
-                pa_array = pa.array(
-                    value, type=pa_type, mask=mask, from_pandas=from_pandas
-                )
+                pa_array = pa.array(value, type=pa_type, mask=mask)
             except (pa.ArrowInvalid, pa.ArrowTypeError):
                 # GH50430: let pyarrow infer type, then cast
-                pa_array = pa.array(value, mask=mask, from_pandas=from_pandas)
+                pa_array = pa.array(value, mask=mask)
 
             if pa_type is None and pa.types.is_duration(pa_array.type):
                 # Workaround https://github.com/apache/arrow/issues/37291
@@ -1517,7 +1538,11 @@ def to_numpy(
             pa.types.is_floating(pa_type)
             and (
                 na_value is np.nan
-                or (original_na_value is lib.no_default and is_float_dtype(dtype))
+                or (
+                    original_na_value is lib.no_default
+                    and is_float_dtype(dtype)
+                    and not using_pyarrow_strict_nans()
+                )
             )
         ):
             result = data._pa_array.to_numpy()
@@ -2390,6 +2415,7 @@ def _replace_with_mask(
             replacements = np.array(replacements, dtype=object)
         elif isinstance(replacements, pa.Scalar):
             replacements = replacements.as_py()
+
         result = np.array(values, dtype=object)
         result[mask] = replacements
         return pa.array(result, type=values.type)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index bfa2309bb023a..f71d2480e45e9 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -778,6 +778,9 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
 
             return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
 
+        # if dtype.kind == "U":
+        #    dtype = np.dtype(object)
+        # return self.to_numpy(dtype=dtype, copy=copy)
         if not copy:
             return np.asarray(self, dtype=dtype)
         else:
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 9c5965951da68..7f924db0dcc3b 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -497,7 +497,9 @@ def to_numpy(
         array([ True, False, False])
         """
         hasna = self._hasna
-        dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna)
+        dtype, na_value = to_numpy_dtype_inference(
+            self, dtype, na_value, hasna, is_pyarrow=False
+        )
         if dtype is None:
             dtype = object
 
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index bf7e8fb02b58e..02b600eb5fee4 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -427,6 +427,15 @@ def is_terminal() -> bool:
         validator=is_one_of_factory([True, False, "warn"]),
     )
 
+with cf.config_prefix("mode"):
+    cf.register_option(
+        "pyarrow_strict_nans",
+        True,
+        # TODO: Change this to False before merging
+        "Whether to make ArrowDtype arrays consistently treat NaN as distinct from NA",
+        validator=is_one_of_factory([True, False]),
+    )
+
 
 # user warnings
 chained_assignment = """
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 4b322466a8b62..3be812f9c1562 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -32,6 +32,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_strict_nans
+
 from pandas._libs import lib
 from pandas._libs.tslibs import timezones
 from pandas.compat import (
@@ -721,7 +723,10 @@ def test_EA_types(self, engine, data, dtype_backend, request):
                 pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")
             )
         df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
-        csv_output = df.to_csv(index=False, na_rep=np.nan)  # should be NA?
+        if using_pyarrow_strict_nans():
+            csv_output = df.to_csv(index=False, na_rep="NA")
+        else:
+            csv_output = df.to_csv(index=False, na_rep=np.nan)
         if pa.types.is_binary(pa_dtype):
             csv_output = BytesIO(csv_output)
         else:
@@ -1512,7 +1517,8 @@ def test_pickle_roundtrip(data):
 
 def test_astype_from_non_pyarrow(data):
     # GH49795
-    pd_array = data._pa_array.to_pandas().array
+    np_arr = data.to_numpy()
+    pd_array = pd.array(np_arr, dtype=np_arr.dtype)
     result = pd_array.astype(data.dtype)
     assert not isinstance(pd_array.dtype, ArrowDtype)
     assert isinstance(result.dtype, ArrowDtype)
@@ -1546,7 +1552,9 @@ def test_to_numpy_with_defaults(data):
     else:
         expected = np.array(data._pa_array)
 
-    if data._hasna and not is_numeric_dtype(data.dtype):
+    if data._hasna and (
+        not is_numeric_dtype(data.dtype) or using_pyarrow_strict_nans()
+    ):
         expected = expected.astype(object)
         expected[pd.isna(data)] = pd.NA
 

From 6cf66ef03ee7073f336b2194dc8d0f603ba3df2c Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 10:17:51 -0700
Subject: [PATCH 16/39] Fix rank, json tests

---
 pandas/io/json/_json.py                  | 14 +++++++++++++
 pandas/tests/extension/test_arrow.py     |  5 ++++-
 pandas/tests/series/methods/test_rank.py | 25 ++++++++++++++++++++----
 3 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 59911a57acc02..53a10c7a680f6 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -994,6 +994,13 @@ def _read_ujson(self) -> DataFrame | Series:
         else:
             obj = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
+            if self.dtype_backend == "pyarrow":
+                # The construction above takes "null" to NaN, which we want to
+                #  convert to NA. But .convert_dtypes to pyarrow doesn't allow
+                #  that, so we do a 2-step conversion through numpy-nullable.
+                obj = obj.convert_dtypes(
+                    infer_objects=False, dtype_backend="numpy_nullable"
+                )
             return obj.convert_dtypes(
                 infer_objects=False, dtype_backend=self.dtype_backend
             )
@@ -1071,6 +1078,13 @@ def __next__(self) -> DataFrame | Series:
             raise ex
 
         if self.dtype_backend is not lib.no_default:
+            if self.dtype_backend == "pyarrow":
+                # The construction above takes "null" to NaN, which we want to
+                #  convert to NA. But .convert_dtypes to pyarrow doesn't allow
+                #  that, so we do a 2-step conversion through numpy-nullable.
+                obj = obj.convert_dtypes(
+                    infer_objects=False, dtype_backend="numpy_nullable"
+                )
             return obj.convert_dtypes(
                 infer_objects=False, dtype_backend=self.dtype_backend
             )
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 3be812f9c1562..60a5a8d9081bc 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -285,7 +285,10 @@ def test_map(self, data_missing, na_action):
             tm.assert_numpy_array_equal(result, expected)
         else:
             result = data_missing.map(lambda x: x, na_action=na_action)
-            if data_missing.dtype == "float32[pyarrow]":
+            if (
+                data_missing.dtype == "float32[pyarrow]"
+                and not using_pyarrow_strict_nans()
+            ):
                 # map roundtrips through objects, which converts to float64
                 expected = data_missing.to_numpy(dtype="float64", na_value=np.nan)
             else:
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index 49142a859e434..e8c49dcce31e0 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -275,7 +275,12 @@ def test_rank_signature(self):
 
     def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
         method, exp = results
-        if dtype == "int64" or (not using_infer_string and dtype == "str"):
+        if (
+            dtype == "int64"
+            or dtype == "int64[pyarrow]"
+            or dtype == "uint64[pyarrow]"
+            or (not using_infer_string and dtype == "str")
+        ):
             pytest.skip("int64/str does not support NaN")
 
         ser = ser if dtype is None else ser.astype(dtype)
@@ -287,7 +292,15 @@ def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
                 exp[np.isnan(ser)] = 9.5
             elif method == "dense":
                 exp[np.isnan(ser)] = 6
-        tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
+            elif method == "max":
+                exp[np.isnan(ser)] = 10
+            elif method == "min":
+                exp[np.isnan(ser)] = 9
+            elif method == "first":
+                exp[np.isnan(ser)] = [9, 10]
+
+        expected = Series(exp, dtype=expected_dtype(dtype, method))
+        tm.assert_series_equal(result, expected)
 
     @pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
     @pytest.mark.parametrize(
@@ -406,8 +419,12 @@ def test_rank_dense_method(self, dtype, ser, exp):
 
     def test_rank_descending(self, ser, results, dtype, using_infer_string):
         method, _ = results
-        if dtype == "int64" or (not using_infer_string and dtype == "str"):
-            s = ser.dropna()
+        if (
+            dtype == "int64"
+            or dtype == "int64[pyarrow]"
+            or (not using_infer_string and dtype == "str")
+        ):
+            s = ser.dropna().astype(dtype)
         else:
             s = ser.astype(dtype)
 

From 7687f84aa1d1757638ad468c2278a0a81db0fb05 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 10:33:51 -0700
Subject: [PATCH 17/39] CLN: remove outdated

---
 pandas/core/arrays/arrow/array.py      | 12 ++----------
 pandas/core/arrays/base.py             |  3 ---
 pandas/tests/extension/base/setitem.py |  1 -
 3 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 9a969ba352122..25046c17a9555 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -352,13 +352,7 @@ def _from_sequence_of_strings(
 
             scalars = to_datetime(strings, errors="raise").date
 
-            if isinstance(strings, cls):
-                # Avoid an object path
-                # TODO: this assumes that pyarrows str->date casting is the
-                # same as to_datetime. Is that a fair assumption?
-                scalars = strings._pa_array.cast(pa_type)
-            else:
-                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
+            scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
 
         elif pa.types.is_duration(pa_type):
             from pandas.core.tools.timedeltas import to_timedelta
@@ -370,9 +364,7 @@ def _from_sequence_of_strings(
                 # attempt to parse as int64 reflecting pyarrow's
                 # duration to string casting behavior
                 mask = isna(scalars)
-                if isinstance(strings, cls):
-                    strings = strings._pa_array
-                elif not isinstance(strings, (pa.Array, pa.ChunkedArray)):
+                if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
                     strings = pa.array(strings, type=pa.string(), mask=mask)
                 strings = pc.if_else(mask, None, strings)
                 try:
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index f71d2480e45e9..bfa2309bb023a 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -778,9 +778,6 @@ def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
 
             return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
 
-        # if dtype.kind == "U":
-        #    dtype = np.dtype(object)
-        # return self.to_numpy(dtype=dtype, copy=copy)
         if not copy:
             return np.asarray(self, dtype=dtype)
         else:
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
index 99ab5d2f7e86f..185d6d750cace 100644
--- a/pandas/tests/extension/base/setitem.py
+++ b/pandas/tests/extension/base/setitem.py
@@ -422,7 +422,6 @@ def test_setitem_frame_2d_values(self, data):
         df.iloc[:-1] = df.iloc[:-1].copy()
         tm.assert_frame_equal(df, orig)
 
-        # FIXME: Breaks for pyarrow float dtype bc df.values changes NAs to NaN
         df.iloc[:] = df.values
         tm.assert_frame_equal(df, orig)
 

From f79950d9ffa8fb80e0056c300c35c2c196dd6680 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 14:12:03 -0700
Subject: [PATCH 18/39] Fix where kludge

---
 pandas/core/arrays/arrow/array.py    |  2 ++
 pandas/core/generic.py               | 17 -----------------
 pandas/tests/extension/test_arrow.py |  7 +++++--
 3 files changed, 7 insertions(+), 19 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 25046c17a9555..59ffce66602e5 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -407,6 +407,8 @@ def _from_sequence_of_strings(
                 mask = strings.is_null()
                 scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
                 # TODO: could we just do strings.cast(pa_type)?
+            elif isinstance(strings, (pa.Array, pa.ChunkedArray)):
+                scalars = strings.cast(pa_type)
             elif mask is not None:
                 scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4707cb28ca060..a2f652d77246f 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -10077,23 +10077,6 @@ def where(
                         stacklevel=2,
                     )
 
-        if other is lib.no_default:
-            if self.ndim == 1:
-                if isinstance(self.dtype, ExtensionDtype):
-                    other = self.dtype.na_value
-                else:
-                    other = np.nan
-            else:
-                if self._mgr.nblocks == 1 and isinstance(
-                    self._mgr.blocks[0].values.dtype, ExtensionDtype
-                ):
-                    # FIXME: checking this is kludgy!
-                    other = self._mgr.blocks[0].values.dtype.na_value
-                else:
-                    # FIXME: the same problem we had with Series will now
-                    #  show up column-by-column!
-                    other = np.nan
-
         other = common.apply_if_callable(other, self)
         return self._where(cond, other, inplace=inplace, axis=axis, level=level)
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 60a5a8d9081bc..d5bf338ceef98 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -1569,8 +1569,11 @@ def test_to_numpy_int_with_na():
     data = [1, None]
     arr = pd.array(data, dtype="int64[pyarrow]")
     result = arr.to_numpy()
-    expected = np.array([1, np.nan])
-    assert isinstance(result[0], float)
+    if using_pyarrow_strict_nans():
+        expected = np.array([1, pd.NA], dtype=object)
+    else:
+        expected = np.array([1, np.nan])
+        assert isinstance(result[0], float)
     tm.assert_numpy_array_equal(result, expected)
 
 

From 57cbdaa274bf1144b0650bb6ed62f16a9db1032d Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 14:21:09 -0700
Subject: [PATCH 19/39] update tests

---
 pandas/tests/extension/test_arrow.py              | 5 ++++-
 pandas/tests/frame/methods/test_convert_dtypes.py | 6 +++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index d5bf338ceef98..a762f5eccfafa 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3537,7 +3537,10 @@ def test_cast_dictionary_different_value_dtype(arrow_type):
 def test_map_numeric_na_action():
     ser = pd.Series([32, 40, None], dtype="int64[pyarrow]")
     result = ser.map(lambda x: 42, na_action="ignore")
-    expected = pd.Series([42.0, 42.0, np.nan], dtype="float64")
+    if using_pyarrow_strict_nans():
+        expected = pd.Series([42.0, 42.0, pd.NA], dtype="object")
+    else:
+        expected = pd.Series([42.0, 42.0, np.nan], dtype="float64")
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index ab847e2f8e81e..21f7811100d43 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_pyarrow_strict_nans
+
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -73,6 +75,8 @@ def test_pyarrow_dtype_backend(self):
             }
         )
         result = df.convert_dtypes(dtype_backend="pyarrow")
+
+        item = None if not using_pyarrow_strict_nans() else np.nan
         expected = pd.DataFrame(
             {
                 "a": pd.arrays.ArrowExtensionArray(
@@ -80,7 +84,7 @@ def test_pyarrow_dtype_backend(self):
                 ),
                 "b": pd.arrays.ArrowExtensionArray(pa.array(["x", "y", None])),
                 "c": pd.arrays.ArrowExtensionArray(pa.array([True, False, None])),
-                "d": pd.arrays.ArrowExtensionArray(pa.array([None, 100.5, 200.0])),
+                "d": pd.arrays.ArrowExtensionArray(pa.array([item, 100.5, 200.0])),
                 "e": pd.arrays.ArrowExtensionArray(
                     pa.array(
                         [

From e3fc3892ba1faf910049e801735847242d4728a9 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sun, 6 Jul 2025 14:32:52 -0700
Subject: [PATCH 20/39] Fix remaining tests

---
 pandas/core/arrays/base.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index bfa2309bb023a..013a10784cd5a 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2539,6 +2539,14 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
             if result is not NotImplemented:
                 return result
 
+        # TODO: putting this here is hacky as heck
+        if self.dtype == "float64[pyarrow]":
+            # e.g. test_log_arrow_backed_missing_value
+            new_inputs = [
+                x if x is not self else x.to_numpy(na_value=np.nan) for x in inputs
+            ]
+            return getattr(ufunc, method)(*new_inputs, **kwargs)
+
         return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
 
     def map(self, mapper, na_action: Literal["ignore"] | None = None):

From 4108cc07ce026377fd9f1ab50548e6b5d9ad5cea Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 7 Jul 2025 07:49:33 -0700
Subject: [PATCH 21/39] mypy fixup

---
 pandas/_libs/missing.pyi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/missing.pyi b/pandas/_libs/missing.pyi
index 6c76fe49330b6..64256ae4b36ad 100644
--- a/pandas/_libs/missing.pyi
+++ b/pandas/_libs/missing.pyi
@@ -14,4 +14,4 @@ def isneginf_scalar(val: object) -> bool: ...
 def checknull(val: object) -> bool: ...
 def isnaobj(arr: np.ndarray) -> npt.NDArray[np.bool_]: ...
 def is_numeric_na(values: np.ndarray) -> npt.NDArray[np.bool_]: ...
-def is_pdna_or_none(values: npt.ndarray) -> npt.NDArray[np.bool_]: ...
+def is_pdna_or_none(values: np.ndarray) -> npt.NDArray[np.bool_]: ...

From b220433325145b9145dfdf7e2d29389f06e397ec Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 7 Jul 2025 12:59:54 -0700
Subject: [PATCH 22/39] old-numpy compat

---
 pandas/core/arrays/arrow/array.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 59ffce66602e5..b8bcafa24f003 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -564,12 +564,8 @@ def _box_pa_array(
                 return pa_array
 
             mask = None
-            if getattr(value, "dtype", None) is None or value.dtype.kind not in "mMf":
-                try:
-                    arr_value = np.asarray(value)
-                except ValueError:
-                    # e.g. list dtype with mixed-length lists
-                    arr_value = np.asarray(value, dtype=object)
+            if getattr(value, "dtype", None) is None or value.dtype.kind not in "iumMf":
+                arr_value = np.asarray(value, dtype=object)
                 # similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
                 mask = is_pdna_or_none(arr_value)
 

From 6ed24a0898081d947f8769effb920a5d64110b15 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 7 Jul 2025 15:45:04 -0700
Subject: [PATCH 23/39] simplify

---
 pandas/core/arrays/arrow/array.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index b8bcafa24f003..7d91ee316cbc5 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -371,7 +371,6 @@ def _from_sequence_of_strings(
                     scalars = strings.cast(pa.int64())
                 except pa.ArrowInvalid:
                     pass
-
         elif pa.types.is_time(pa_type):
             from pandas.core.tools.times import to_time
 
@@ -399,18 +398,10 @@ def _from_sequence_of_strings(
             from pandas.core.tools.numeric import to_numeric
 
             scalars = to_numeric(strings, errors="raise")
-            if not pa.types.is_decimal(pa_type) and isinstance(
-                strings, (pa.Array, pa.ChunkedArray)
-            ):
-                # TODO: figure out why doing this cast breaks with decimal dtype
-                #  in test_from_sequence_of_strings_pa_array
-                mask = strings.is_null()
-                scalars = pa.array(scalars, mask=np.array(mask), type=pa_type)
-                # TODO: could we just do strings.cast(pa_type)?
-            elif isinstance(strings, (pa.Array, pa.ChunkedArray)):
+            if isinstance(strings, (pa.Array, pa.ChunkedArray)):
                 scalars = strings.cast(pa_type)
             elif mask is not None:
-                scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
+                scalars = pa.array(scalars, mask=mask, type=pa_type)
 
         else:
             raise NotImplementedError(

From 05d4a94f9866dd3f98a30bfe90bf35736c2daad2 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 31 Jul 2025 09:36:32 -0700
Subject: [PATCH 24/39] Better option name, fixture

---
 pandas/_config/__init__.py                    |  4 +--
 pandas/conftest.py                            |  7 +++++
 pandas/core/arrays/_utils.py                  |  4 +--
 pandas/core/arrays/arrow/array.py             | 26 +++++++++++++++---
 pandas/core/config_init.py                    |  7 ++---
 pandas/tests/extension/test_arrow.py          | 27 +++++++------------
 .../frame/methods/test_convert_dtypes.py      |  6 ++---
 pandas/tests/series/methods/test_rank.py      |  1 +
 8 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py
index fbf388224254f..ee709eff2eeae 100644
--- a/pandas/_config/__init__.py
+++ b/pandas/_config/__init__.py
@@ -35,6 +35,6 @@ def using_string_dtype() -> bool:
     return _mode_options["infer_string"]
 
 
-def using_pyarrow_strict_nans() -> bool:
+def is_nan_na() -> bool:
     _mode_options = _global_config["mode"]
-    return _mode_options["pyarrow_strict_nans"]
+    return _mode_options["nan_is_na"]
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 774936be33631..d69c7e0113310 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -2116,3 +2116,10 @@ def temp_file(tmp_path):
 def monkeysession():
     with pytest.MonkeyPatch.context() as mp:
         yield mp
+
+
+@pytest.fixture(params=[True, False])
+def using_nan_is_na(request):
+    opt = request.param
+    with pd.option_context("mode.nan_is_na", opt):
+        yield opt
diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py
index 9adde3846ca03..e511b481887a9 100644
--- a/pandas/core/arrays/_utils.py
+++ b/pandas/core/arrays/_utils.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from pandas._config import using_pyarrow_strict_nans
+from pandas._config import is_nan_na
 
 from pandas._libs import lib
 from pandas._libs.missing import NA
@@ -41,7 +41,7 @@ def to_numpy_dtype_inference(
                 else:
                     dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
                 if na_value is lib.no_default:
-                    if is_pyarrow and using_pyarrow_strict_nans():
+                    if is_pyarrow and not is_nan_na():
                         na_value = NA
                         dtype = np.dtype(object)
                     else:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 7d91ee316cbc5..03be3a87b0e2e 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -16,7 +16,7 @@
 
 import numpy as np
 
-from pandas._config import using_pyarrow_strict_nans
+from pandas._config import is_nan_na
 
 from pandas._libs import lib
 from pandas._libs.missing import is_pdna_or_none
@@ -35,6 +35,7 @@
 
 from pandas.core.dtypes.cast import (
     can_hold_element,
+    construct_1d_object_array_from_listlike,
     infer_dtype_from_scalar,
 )
 from pandas.core.dtypes.common import (
@@ -555,7 +556,22 @@ def _box_pa_array(
                 return pa_array
 
             mask = None
-            if getattr(value, "dtype", None) is None or value.dtype.kind not in "iumMf":
+            if is_nan_na():
+                try:
+                    arr_value = np.asarray(value)
+                    if arr_value.ndim > 1:
+                        # e.g. test_fixed_size_list we have list data.  ndim > 1
+                        #  means there were no scalar (NA) entries.
+                        mask = np.zeros(len(value), dtype=np.bool_)
+                    else:
+                        mask = isna(arr_value)
+                except ValueError:
+                    # Ragged data that numpy raises on
+                    arr_value = construct_1d_object_array_from_listlike(value)
+                    mask = isna(arr_value)
+            elif (
+                getattr(value, "dtype", None) is None or value.dtype.kind not in "iumMf"
+            ):
                 arr_value = np.asarray(value, dtype=object)
                 # similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
                 mask = is_pdna_or_none(arr_value)
@@ -1490,7 +1506,9 @@ def to_numpy(
         na_value: object = lib.no_default,
     ) -> np.ndarray:
         original_na_value = na_value
-        dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, self._hasna)
+        dtype, na_value = to_numpy_dtype_inference(
+            self, dtype, na_value, self._hasna, is_pyarrow=True
+        )
         pa_type = self._pa_array.type
         if not self._hasna or isna(na_value) or pa.types.is_null(pa_type):
             data = self
@@ -1522,7 +1540,7 @@ def to_numpy(
                 or (
                     original_na_value is lib.no_default
                     and is_float_dtype(dtype)
-                    and not using_pyarrow_strict_nans()
+                    and is_nan_na()
                 )
             )
         ):
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 02b600eb5fee4..26c4f7c080799 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -429,10 +429,11 @@ def is_terminal() -> bool:
 
 with cf.config_prefix("mode"):
     cf.register_option(
-        "pyarrow_strict_nans",
-        True,
+        "nan_is_na",
+        False,
         # TODO: Change this to False before merging
-        "Whether to make ArrowDtype arrays consistently treat NaN as distinct from NA",
+        "Whether to make ArrowDtype arrays consistently treat NaN as "
+        "interchangeable with pd.NA",
         validator=is_one_of_factory([True, False]),
     )
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index a762f5eccfafa..6de8ebf8d03ad 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -32,8 +32,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_strict_nans
-
 from pandas._libs import lib
 from pandas._libs.tslibs import timezones
 from pandas.compat import (
@@ -278,17 +276,14 @@ def test_compare_scalar(self, data, comparison_op):
         self._compare_other(ser, data, comparison_op, data[0])
 
     @pytest.mark.parametrize("na_action", [None, "ignore"])
-    def test_map(self, data_missing, na_action):
+    def test_map(self, data_missing, na_action, using_nan_is_na):
         if data_missing.dtype.kind in "mM":
             result = data_missing.map(lambda x: x, na_action=na_action)
             expected = data_missing.to_numpy(dtype=object)
             tm.assert_numpy_array_equal(result, expected)
         else:
             result = data_missing.map(lambda x: x, na_action=na_action)
-            if (
-                data_missing.dtype == "float32[pyarrow]"
-                and not using_pyarrow_strict_nans()
-            ):
+            if data_missing.dtype == "float32[pyarrow]" and using_nan_is_na:
                 # map roundtrips through objects, which converts to float64
                 expected = data_missing.to_numpy(dtype="float64", na_value=np.nan)
             else:
@@ -705,7 +700,7 @@ def test_setitem_preserves_views(self, data):
 
     @pytest.mark.parametrize("dtype_backend", ["pyarrow", no_default])
     @pytest.mark.parametrize("engine", ["c", "python"])
-    def test_EA_types(self, engine, data, dtype_backend, request):
+    def test_EA_types(self, engine, data, dtype_backend, request, using_nan_is_na):
         pa_dtype = data.dtype.pyarrow_dtype
         if pa.types.is_decimal(pa_dtype):
             request.applymarker(
@@ -726,7 +721,7 @@ def test_EA_types(self, engine, data, dtype_backend, request):
                 pytest.mark.xfail(reason="CSV parsers don't correctly handle binary")
             )
         df = pd.DataFrame({"with_dtype": pd.Series(data, dtype=str(data.dtype))})
-        if using_pyarrow_strict_nans():
+        if not using_nan_is_na:
             csv_output = df.to_csv(index=False, na_rep="NA")
         else:
             csv_output = df.to_csv(index=False, na_rep=np.nan)
@@ -1543,7 +1538,7 @@ def test_astype_errors_ignore():
     tm.assert_frame_equal(result, expected)
 
 
-def test_to_numpy_with_defaults(data):
+def test_to_numpy_with_defaults(data, using_nan_is_na):
     # GH49973
     result = data.to_numpy()
 
@@ -1555,21 +1550,19 @@ def test_to_numpy_with_defaults(data):
     else:
         expected = np.array(data._pa_array)
 
-    if data._hasna and (
-        not is_numeric_dtype(data.dtype) or using_pyarrow_strict_nans()
-    ):
+    if data._hasna and (not is_numeric_dtype(data.dtype) or not using_nan_is_na):
         expected = expected.astype(object)
         expected[pd.isna(data)] = pd.NA
 
     tm.assert_numpy_array_equal(result, expected)
 
 
-def test_to_numpy_int_with_na():
+def test_to_numpy_int_with_na(using_nan_is_na):
     # GH51227: ensure to_numpy does not convert int to float
     data = [1, None]
     arr = pd.array(data, dtype="int64[pyarrow]")
     result = arr.to_numpy()
-    if using_pyarrow_strict_nans():
+    if not using_nan_is_na:
         expected = np.array([1, pd.NA], dtype=object)
     else:
         expected = np.array([1, np.nan])
@@ -3534,10 +3527,10 @@ def test_cast_dictionary_different_value_dtype(arrow_type):
     assert result.dtypes.iloc[0] == data_type
 
 
-def test_map_numeric_na_action():
+def test_map_numeric_na_action(using_nan_is_na):
     ser = pd.Series([32, 40, None], dtype="int64[pyarrow]")
     result = ser.map(lambda x: 42, na_action="ignore")
-    if using_pyarrow_strict_nans():
+    if not using_nan_is_na:
         expected = pd.Series([42.0, 42.0, pd.NA], dtype="object")
     else:
         expected = pd.Series([42.0, 42.0, np.nan], dtype="float64")
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
index 21f7811100d43..cd850f8019ea1 100644
--- a/pandas/tests/frame/methods/test_convert_dtypes.py
+++ b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -3,8 +3,6 @@
 import numpy as np
 import pytest
 
-from pandas._config import using_pyarrow_strict_nans
-
 import pandas.util._test_decorators as td
 
 import pandas as pd
@@ -61,7 +59,7 @@ def test_convert_dtypes_retain_column_names(self):
         tm.assert_index_equal(result.columns, df.columns)
         assert result.columns.name == "cols"
 
-    def test_pyarrow_dtype_backend(self):
+    def test_pyarrow_dtype_backend(self, using_nan_is_na):
         pa = pytest.importorskip("pyarrow")
         df = pd.DataFrame(
             {
@@ -76,7 +74,7 @@ def test_pyarrow_dtype_backend(self):
         )
         result = df.convert_dtypes(dtype_backend="pyarrow")
 
-        item = None if not using_pyarrow_strict_nans() else np.nan
+        item = None if using_nan_is_na else np.nan
         expected = pd.DataFrame(
             {
                 "a": pd.arrays.ArrowExtensionArray(
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index e8c49dcce31e0..7d3aa8f171534 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -279,6 +279,7 @@ def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
             dtype == "int64"
             or dtype == "int64[pyarrow]"
             or dtype == "uint64[pyarrow]"
+            or dtype == "float64[pyarrow]"
             or (not using_infer_string and dtype == "str")
         ):
             pytest.skip("int64/str does not support NaN")

From cbc14d5988389211cd9949d10194fe096da37567 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 31 Jul 2025 09:38:35 -0700
Subject: [PATCH 25/39] default True

---
 pandas/core/config_init.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 26c4f7c080799..a8014afb225bb 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -430,8 +430,7 @@ def is_terminal() -> bool:
 with cf.config_prefix("mode"):
     cf.register_option(
         "nan_is_na",
-        False,
-        # TODO: Change this to False before merging
+        True,
         "Whether to make ArrowDtype arrays consistently treat NaN as "
         "interchangeable with pd.NA",
         validator=is_one_of_factory([True, False]),

From a238601f7c16e9448a3db744ea6f556e1f687854 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 31 Jul 2025 11:15:58 -0700
Subject: [PATCH 26/39] Patch ops

---
 pandas/core/arrays/arrow/array.py    | 47 +++++++++++++++++++++++++++-
 pandas/core/arrays/base.py           |  8 -----
 pandas/tests/extension/test_arrow.py | 23 ++++++++++++++
 pandas/tests/series/test_npfuncs.py  |  2 +-
 4 files changed, 70 insertions(+), 10 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 03be3a87b0e2e..829931d04c7af 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -50,10 +50,16 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
+from pandas.core.dtypes.generic import (
+    ABCDataFrame,
+    ABCIndex,
+    ABCSeries,
+)
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import (
     algorithms as algos,
+    arraylike,
     missing,
     ops,
     roperator,
@@ -752,6 +758,39 @@ def __array__(
 
         return self.to_numpy(dtype=dtype, copy=copy)
 
+    def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
+        if any(
+            isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
+        ):
+            return NotImplemented
+
+        result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
+            self, ufunc, method, *inputs, **kwargs
+        )
+        if result is not NotImplemented:
+            return result
+
+        if "out" in kwargs:
+            return arraylike.dispatch_ufunc_with_out(
+                self, ufunc, method, *inputs, **kwargs
+            )
+
+        if method == "reduce":
+            result = arraylike.dispatch_reduction_ufunc(
+                self, ufunc, method, *inputs, **kwargs
+            )
+            if result is not NotImplemented:
+                return result
+
+        if self.dtype.kind == "f":
+            # e.g. test_log_arrow_backed_missing_value
+            new_inputs = [
+                x if x is not self else x.to_numpy(na_value=np.nan) for x in inputs
+            ]
+            return getattr(ufunc, method)(*new_inputs, **kwargs)
+
+        return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
+
     def __invert__(self) -> Self:
         # This is a bit wise op for integer types
         if pa.types.is_integer(self._pa_array.type):
@@ -923,7 +962,13 @@ def _logical_method(self, other, op) -> Self:
             return self._evaluate_op_method(other, op, ARROW_LOGICAL_FUNCS)
 
     def _arith_method(self, other, op) -> Self:
-        return self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS)
+        result = self._evaluate_op_method(other, op, ARROW_ARITHMETIC_FUNCS)
+        if is_nan_na() and result.dtype.kind == "f":
+            parr = result._pa_array
+            mask = pc.is_nan(parr).to_numpy()
+            arr = pc.replace_with_mask(parr, mask, pa.scalar(None, type=parr.type))
+            result = type(self)(arr)
+        return result
 
     def equals(self, other) -> bool:
         if not isinstance(other, ArrowExtensionArray):
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 013a10784cd5a..bfa2309bb023a 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -2539,14 +2539,6 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
             if result is not NotImplemented:
                 return result
 
-        # TODO: putting this here is hacky as heck
-        if self.dtype == "float64[pyarrow]":
-            # e.g. test_log_arrow_backed_missing_value
-            new_inputs = [
-                x if x is not self else x.to_numpy(na_value=np.nan) for x in inputs
-            ]
-            return getattr(ufunc, method)(*new_inputs, **kwargs)
-
         return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
 
     def map(self, mapper, na_action: Literal["ignore"] | None = None):
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 6de8ebf8d03ad..40d60bdc3418a 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3589,3 +3589,26 @@ def test_timestamp_dtype_matches_to_datetime():
     expected = pd.Series([ts], dtype=dtype1).convert_dtypes(dtype_backend="pyarrow")
 
     tm.assert_series_equal(result, expected)
+
+
+def test_ops_with_nan_is_na(using_nan_is_na):
+    # GH#61732
+    ser = pd.Series([-1, 0, 1], dtype="int64[pyarrow]")
+
+    result = ser - np.nan
+    if using_nan_is_na:
+        assert result.isna().all()
+    else:
+        assert not result.isna().any()
+
+    result = ser * np.nan
+    if using_nan_is_na:
+        assert result.isna().all()
+    else:
+        assert not result.isna().any()
+
+    result = ser / 0
+    if using_nan_is_na:
+        assert result.isna()[1]
+    else:
+        assert not result.isna()[1]
diff --git a/pandas/tests/series/test_npfuncs.py b/pandas/tests/series/test_npfuncs.py
index 11a51c4700d5c..a681420ea6b38 100644
--- a/pandas/tests/series/test_npfuncs.py
+++ b/pandas/tests/series/test_npfuncs.py
@@ -38,7 +38,7 @@ def test_numpy_argwhere(index):
 
 
 @td.skip_if_no("pyarrow")
-def test_log_arrow_backed_missing_value():
+def test_log_arrow_backed_missing_value(using_nan_is_na):
     # GH#56285
     ser = Series([1, 2, None], dtype="float64[pyarrow]")
     result = np.log(ser)

From 3f15ca86d98f8268a37b70b5003d013469b78c4f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 31 Jul 2025 11:18:18 -0700
Subject: [PATCH 27/39] mypy fixup

---
 pandas/core/arrays/arrow/array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 829931d04c7af..68076fefd9a65 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -580,7 +580,7 @@ def _box_pa_array(
             ):
                 arr_value = np.asarray(value, dtype=object)
                 # similar to isna(value) but exclude NaN, NaT, nat-like, nan-like
-                mask = is_pdna_or_none(arr_value)
+                mask = is_pdna_or_none(arr_value)  # type: ignore[assignment]
 
             try:
                 pa_array = pa.array(value, type=pa_type, mask=mask)

From a5d3848f6a221997a1ae3a1ae9384f985dfc2ffc Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 31 Jul 2025 11:34:24 -0700
Subject: [PATCH 28/39] Test for setitem/construction

---
 pandas/core/arrays/arrow/array.py    |  2 +-
 pandas/io/json/_json.py              | 20 +++++++----------
 pandas/tests/extension/test_arrow.py | 32 ++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 13 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 68076fefd9a65..d8cf2f23f0c7d 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -452,7 +452,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
         """
         if isinstance(value, pa.Scalar):
             pa_scalar = value
-        elif isna(value) and not lib.is_float(value):
+        elif isna(value) and not (lib.is_float(value) and not is_nan_na()):
             pa_scalar = pa.scalar(None, type=pa_type)
         else:
             # Workaround https://github.com/apache/arrow/issues/37291
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 53a10c7a680f6..1c79f24a9fd96 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -18,6 +18,8 @@
 
 import numpy as np
 
+from pandas._config import option_context
+
 from pandas._libs import lib
 from pandas._libs.json import (
     ujson_dumps,
@@ -994,16 +996,13 @@ def _read_ujson(self) -> DataFrame | Series:
         else:
             obj = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
-            if self.dtype_backend == "pyarrow":
+            with option_context("mode.nan_is_na", True):
                 # The construction above takes "null" to NaN, which we want to
                 #  convert to NA. But .convert_dtypes to pyarrow doesn't allow
                 #  that, so we do a 2-step conversion through numpy-nullable.
-                obj = obj.convert_dtypes(
-                    infer_objects=False, dtype_backend="numpy_nullable"
+                return obj.convert_dtypes(
+                    infer_objects=False, dtype_backend=self.dtype_backend
                 )
-            return obj.convert_dtypes(
-                infer_objects=False, dtype_backend=self.dtype_backend
-            )
         else:
             return obj
 
@@ -1078,16 +1077,13 @@ def __next__(self) -> DataFrame | Series:
             raise ex
 
         if self.dtype_backend is not lib.no_default:
-            if self.dtype_backend == "pyarrow":
+            with option_context("mode.nan_is_na", True):
                 # The construction above takes "null" to NaN, which we want to
                 #  convert to NA. But .convert_dtypes to pyarrow doesn't allow
                 #  that, so we do a 2-step conversion through numpy-nullable.
-                obj = obj.convert_dtypes(
-                    infer_objects=False, dtype_backend="numpy_nullable"
+                return obj.convert_dtypes(
+                    infer_objects=False, dtype_backend=self.dtype_backend
                 )
-            return obj.convert_dtypes(
-                infer_objects=False, dtype_backend=self.dtype_backend
-            )
         else:
             return obj
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 40d60bdc3418a..4d56edfa9ffae 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3612,3 +3612,35 @@ def test_ops_with_nan_is_na(using_nan_is_na):
         assert result.isna()[1]
     else:
         assert not result.isna()[1]
+
+
+def test_setitem_float_nan_is_na(using_nan_is_na):
+    # GH#61732
+    import pyarrow as pa
+
+    ser = pd.Series([-1, 0, 1], dtype="int64[pyarrow]")
+
+    if using_nan_is_na:
+        ser[1] = np.nan
+        assert ser.isna()[1]
+    else:
+        msg = "Could not convert nan with type float: tried to convert to int64"
+        with pytest.raises(pa.lib.ArrowInvalid, match=msg):
+            ser[1] = np.nan
+
+    ser = pd.Series([-1, np.nan, 1], dtype="float64[pyarrow]")
+    if using_nan_is_na:
+        assert ser.isna()[1]
+        assert ser[1] is pd.NA
+
+        ser[1] = np.nan
+        assert ser[1] is pd.NA
+
+    else:
+        assert not ser.isna()[1]
+        assert isinstance(ser[1], float)
+        assert np.isnan(ser[1])
+
+        ser[2] = np.nan
+        assert isinstance(ser[2], float)
+        assert np.isnan(ser[2])

From 670a940f6619f6bac33307550bab5be16dc4220b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 31 Jul 2025 13:28:57 -0700
Subject: [PATCH 29/39] update ufunc test

---
 pandas/core/arrays/arrow/array.py   | 39 -----------------------------
 pandas/tests/series/test_npfuncs.py | 12 ++++++---
 2 files changed, 9 insertions(+), 42 deletions(-)

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index d8cf2f23f0c7d..7aeeefbe2913a 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -50,16 +50,10 @@
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
-from pandas.core.dtypes.generic import (
-    ABCDataFrame,
-    ABCIndex,
-    ABCSeries,
-)
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import (
     algorithms as algos,
-    arraylike,
     missing,
     ops,
     roperator,
@@ -758,39 +752,6 @@ def __array__(
 
         return self.to_numpy(dtype=dtype, copy=copy)
 
-    def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
-        if any(
-            isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
-        ):
-            return NotImplemented
-
-        result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
-            self, ufunc, method, *inputs, **kwargs
-        )
-        if result is not NotImplemented:
-            return result
-
-        if "out" in kwargs:
-            return arraylike.dispatch_ufunc_with_out(
-                self, ufunc, method, *inputs, **kwargs
-            )
-
-        if method == "reduce":
-            result = arraylike.dispatch_reduction_ufunc(
-                self, ufunc, method, *inputs, **kwargs
-            )
-            if result is not NotImplemented:
-                return result
-
-        if self.dtype.kind == "f":
-            # e.g. test_log_arrow_backed_missing_value
-            new_inputs = [
-                x if x is not self else x.to_numpy(na_value=np.nan) for x in inputs
-            ]
-            return getattr(ufunc, method)(*new_inputs, **kwargs)
-
-        return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
-
     def __invert__(self) -> Self:
         # This is a bit wise op for integer types
         if pa.types.is_integer(self._pa_array.type):
diff --git a/pandas/tests/series/test_npfuncs.py b/pandas/tests/series/test_npfuncs.py
index a681420ea6b38..b72ac8efbaa6d 100644
--- a/pandas/tests/series/test_npfuncs.py
+++ b/pandas/tests/series/test_npfuncs.py
@@ -41,6 +41,12 @@ def test_numpy_argwhere(index):
 def test_log_arrow_backed_missing_value(using_nan_is_na):
     # GH#56285
     ser = Series([1, 2, None], dtype="float64[pyarrow]")
-    result = np.log(ser)
-    expected = np.log(Series([1, 2, None], dtype="float64"))
-    tm.assert_series_equal(result, expected)
+    if using_nan_is_na:
+        result = np.log(ser)
+        expected = np.log(Series([1, 2, None], dtype="float64"))
+        tm.assert_series_equal(result, expected)
+    else:
+        # we get cast to object which raises
+        msg = "loop of ufunc does not support argument"
+        with pytest.raises(TypeError, match=msg):
+            np.log(ser)

From 3a032a485286e87ef612e51127ffe91289740d3b Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Thu, 31 Jul 2025 13:55:06 -0700
Subject: [PATCH 30/39] Improve rank test skips

---
 pandas/tests/series/methods/test_rank.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index 7d3aa8f171534..7d96f7f862fce 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -273,20 +273,20 @@ def test_rank_signature(self):
         with pytest.raises(ValueError, match=msg):
             s.rank("average")
 
-    def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
+    def test_rank_tie_methods(
+        self, ser, results, dtype, using_infer_string, using_nan_is_na
+    ):
         method, exp = results
         if (
             dtype == "int64"
-            or dtype == "int64[pyarrow]"
-            or dtype == "uint64[pyarrow]"
-            or dtype == "float64[pyarrow]"
+            or (dtype in ["int64[pyarrow]", "uint64[pyarrow]"] and not using_nan_is_na)
             or (not using_infer_string and dtype == "str")
         ):
             pytest.skip("int64/str does not support NaN")
 
         ser = ser if dtype is None else ser.astype(dtype)
         result = ser.rank(method=method)
-        if dtype == "float64[pyarrow]":
+        if dtype == "float64[pyarrow]" and not using_nan_is_na:
             # the NaNs are not treated as NA
             exp = exp.copy()
             if method == "average":
@@ -418,11 +418,13 @@ def test_rank_dense_method(self, dtype, ser, exp):
         expected = Series(exp).astype(expected_dtype(dtype, "dense"))
         tm.assert_series_equal(result, expected)
 
-    def test_rank_descending(self, ser, results, dtype, using_infer_string):
+    def test_rank_descending(
+        self, ser, results, dtype, using_infer_string, using_nan_is_na
+    ):
         method, _ = results
         if (
             dtype == "int64"
-            or dtype == "int64[pyarrow]"
+            or (dtype in ["int64[pyarrow]"] and not using_nan_is_na)
             or (not using_infer_string and dtype == "str")
         ):
             s = ser.dropna().astype(dtype)

From c59b9de0d4978b49364f03ec78cd11a8170b724d Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 08:35:37 -0700
Subject: [PATCH 31/39] ENH: mode.nan_is_na for numpy-nullable dtypes

---
 doc/source/user_guide/text.rst                |  2 +-
 pandas/_libs/parsers.pyx                      |  5 +-
 pandas/core/algorithms.py                     | 11 +++-
 pandas/core/arrays/_utils.py                  |  3 +-
 pandas/core/arrays/arrow/array.py             | 21 ++++---
 pandas/core/arrays/masked.py                  | 15 +++--
 pandas/core/arrays/numeric.py                 | 25 +++++++-
 pandas/core/config_init.py                    |  8 +--
 pandas/core/dtypes/cast.py                    | 15 ++++-
 pandas/core/indexes/base.py                   | 10 ++++
 pandas/core/internals/construction.py         | 29 ++++++---
 pandas/io/json/_json.py                       |  6 --
 pandas/io/json/_table_schema.py               |  5 +-
 pandas/io/parsers/arrow_parser_wrapper.py     |  4 +-
 .../tests/arrays/floating/test_arithmetic.py  | 30 ++++++----
 .../tests/arrays/floating/test_comparison.py  | 12 +++-
 .../arrays/floating/test_construction.py      | 13 +++-
 pandas/tests/arrays/floating/test_contains.py |  7 ++-
 pandas/tests/arrays/floating/test_function.py | 46 +++++++++++----
 pandas/tests/arrays/floating/test_to_numpy.py | 22 +++++--
 .../tests/arrays/integer/test_arithmetic.py   | 37 ++++++++----
 .../tests/arrays/integer/test_construction.py | 46 ++++++++++++---
 pandas/tests/arrays/integer/test_function.py  | 59 ++++++++++++++-----
 pandas/tests/arrays/integer/test_reduction.py |  4 +-
 .../arrays/interval/test_interval_pyarrow.py  |  2 +-
 pandas/tests/arrays/masked/test_function.py   | 15 ++---
 pandas/tests/arrays/string_/test_string.py    |  3 +-
 pandas/tests/base/test_conversion.py          |  5 +-
 pandas/tests/base/test_unique.py              |  8 ++-
 pandas/tests/extension/base/interface.py      | 18 +++++-
 pandas/tests/extension/test_masked.py         | 16 +++--
 pandas/tests/frame/methods/test_astype.py     | 12 +++-
 pandas/tests/frame/methods/test_replace.py    |  4 +-
 pandas/tests/frame/test_reductions.py         | 12 +++-
 pandas/tests/groupby/methods/test_quantile.py |  6 +-
 pandas/tests/groupby/test_reductions.py       |  5 +-
 .../tests/indexes/multi/test_constructors.py  |  2 +-
 pandas/tests/indexes/numeric/test_indexing.py | 53 +++++++++++------
 pandas/tests/indexing/test_iloc.py            | 11 ++--
 pandas/tests/indexing/test_loc.py             |  2 +-
 pandas/tests/reshape/test_cut.py              |  4 +-
 .../series/accessors/test_dt_accessor.py      |  4 +-
 pandas/tests/series/methods/test_case_when.py |  3 +-
 pandas/tests/series/methods/test_clip.py      | 13 ++--
 .../series/methods/test_convert_dtypes.py     | 11 ++++
 pandas/tests/series/methods/test_rank.py      | 28 +++++++--
 46 files changed, 480 insertions(+), 192 deletions(-)

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
index 3bb151a2dd339..11d5ab86e76ef 100644
--- a/doc/source/user_guide/text.rst
+++ b/doc/source/user_guide/text.rst
@@ -75,7 +75,7 @@ or convert from existing pandas data:
 
 .. ipython:: python
 
-   s1 = pd.Series([1, 2, np.nan], dtype="Int64")
+   s1 = pd.Series([1, 2, pd.NA], dtype="Int64")
    s1
    s2 = s1.astype("string")
    s2
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 1f5813940c058..a25fedc8d33f4 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -8,6 +8,8 @@ from csv import (
 )
 import warnings
 
+from pandas._config import is_nan_na
+
 from pandas.util._exceptions import find_stack_level
 
 from pandas import StringDtype
@@ -43,7 +45,6 @@ from libc.string cimport (
     strncpy,
 )
 
-
 import numpy as np
 
 cimport numpy as cnp
@@ -1461,7 +1462,7 @@ def _maybe_upcast(
         if isinstance(arr, IntegerArray) and arr.isna().all():
             # use null instead of int64 in pyarrow
             arr = arr.to_numpy(na_value=None)
-        arr = ArrowExtensionArray(pa.array(arr))
+        arr = ArrowExtensionArray(pa.array(arr, from_pandas=is_nan_na()))
 
     return arr
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 533b9b689af0b..c14ab2bc02da2 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1065,7 +1065,16 @@ def rank(
         (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
     """
     is_datetimelike = needs_i8_conversion(values.dtype)
-    values = _ensure_data(values)
+    if (
+        isinstance(values.dtype, BaseMaskedDtype)
+        and values._hasna
+        and values.dtype.kind in "iuf"
+    ):
+        # e.g. test_rank_ea_small_values
+        # TODO: bug in the object-dtype path that we would get without this special casting.
+        values = values.to_numpy(dtype=np.float64, na_value=np.nan)
+    else:
+        values = _ensure_data(values)
 
     if values.ndim == 1:
         ranks = algos.rank_1d(
diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py
index e511b481887a9..67ce39de75b20 100644
--- a/pandas/core/arrays/_utils.py
+++ b/pandas/core/arrays/_utils.py
@@ -28,7 +28,6 @@ def to_numpy_dtype_inference(
     dtype: npt.DTypeLike | None,
     na_value,
     hasna: bool,
-    is_pyarrow: bool = True,
 ) -> tuple[npt.DTypeLike, Any]:
     if dtype is None and is_numeric_dtype(arr.dtype):
         dtype_given = False
@@ -41,7 +40,7 @@ def to_numpy_dtype_inference(
                 else:
                     dtype = arr.dtype.numpy_dtype  # type: ignore[union-attr]
                 if na_value is lib.no_default:
-                    if is_pyarrow and not is_nan_na():
+                    if not is_nan_na():
                         na_value = NA
                         dtype = np.dtype(object)
                     else:
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
index 7aeeefbe2913a..8ec3d37236b17 100644
--- a/pandas/core/arrays/arrow/array.py
+++ b/pandas/core/arrays/arrow/array.py
@@ -352,9 +352,7 @@ def _from_sequence_of_strings(
             from pandas.core.tools.datetimes import to_datetime
 
             scalars = to_datetime(strings, errors="raise").date
-
-            scalars = pa.array(scalars, mask=mask.view(bool), type=pa_type)
-
+            scalars = pa.array(scalars, type=pa_type, mask=mask)
         elif pa.types.is_duration(pa_type):
             from pandas.core.tools.timedeltas import to_timedelta
 
@@ -965,7 +963,10 @@ def __len__(self) -> int:
     def __contains__(self, key) -> bool:
         # https://github.com/pandas-dev/pandas/pull/51307#issuecomment-1426372604
         if isna(key) and key is not self.dtype.na_value:
-            if self.dtype.kind == "f" and lib.is_float(key):
+            if lib.is_float(key) and is_nan_na():
+                return self.dtype.na_value in self
+            elif self.dtype.kind == "f" and lib.is_float(key):
+                # Check specifically for NaN
                 return pc.any(pc.is_nan(self._pa_array)).as_py()
 
             # e.g. date or timestamp types we do not allow None here to match pd.NA
@@ -1512,9 +1513,7 @@ def to_numpy(
         na_value: object = lib.no_default,
     ) -> np.ndarray:
         original_na_value = na_value
-        dtype, na_value = to_numpy_dtype_inference(
-            self, dtype, na_value, self._hasna, is_pyarrow=True
-        )
+        dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, self._hasna)
         pa_type = self._pa_array.type
         if not self._hasna or isna(na_value) or pa.types.is_null(pa_type):
             data = self
@@ -2073,7 +2072,7 @@ def __setitem__(self, key, value) -> None:
                 raise ValueError("Length of indexer and values mismatch")
             chunks = [
                 *self._pa_array[:key].chunks,
-                pa.array([value], type=self._pa_array.type),
+                pa.array([value], type=self._pa_array.type, from_pandas=is_nan_na()),
                 *self._pa_array[key + 1 :].chunks,
             ]
             data = pa.chunked_array(chunks).combine_chunks()
@@ -2127,7 +2126,7 @@ def _rank_calc(
                 pa_type = pa.float64()
             else:
                 pa_type = pa.uint64()
-            result = pa.array(ranked, type=pa_type)
+            result = pa.array(ranked, type=pa_type, from_pandas=is_nan_na())
             return result
 
         data = self._pa_array.combine_chunks()
@@ -2379,7 +2378,7 @@ def _to_numpy_and_type(value) -> tuple[np.ndarray, pa.DataType | None]:
         right, right_type = _to_numpy_and_type(right)
         pa_type = left_type or right_type
         result = np.where(cond, left, right)
-        return pa.array(result, type=pa_type)
+        return pa.array(result, type=pa_type, from_pandas=is_nan_na())
 
     @classmethod
     def _replace_with_mask(
@@ -2423,7 +2422,7 @@ def _replace_with_mask(
 
         result = np.array(values, dtype=object)
         result[mask] = replacements
-        return pa.array(result, type=values.type)
+        return pa.array(result, type=values.type, from_pandas=is_nan_na())
 
     # ------------------------------------------------------------------
     # GroupBy Methods
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 7f924db0dcc3b..4005138b54850 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -11,6 +11,8 @@
 
 import numpy as np
 
+from pandas._config import is_nan_na
+
 from pandas._libs import (
     algos as libalgos,
     lib,
@@ -310,7 +312,9 @@ def __setitem__(self, key, value) -> None:
     def __contains__(self, key) -> bool:
         if isna(key) and key is not self.dtype.na_value:
             # GH#52840
-            if self._data.dtype.kind == "f" and lib.is_float(key):
+            if lib.is_float(key) and is_nan_na():
+                key = self.dtype.na_value
+            elif self._data.dtype.kind == "f" and lib.is_float(key):
                 return bool((np.isnan(self._data) & ~self._mask).any())
 
         return bool(super().__contains__(key))
@@ -497,9 +501,7 @@ def to_numpy(
         array([ True, False, False])
         """
         hasna = self._hasna
-        dtype, na_value = to_numpy_dtype_inference(
-            self, dtype, na_value, hasna, is_pyarrow=False
-        )
+        dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna)
         if dtype is None:
             dtype = object
 
@@ -670,6 +672,8 @@ def reconstruct(x: np.ndarray):
                     # reached in e.g. np.sqrt on BooleanArray
                     # we don't support float16
                     x = x.astype(np.float32)
+                if is_nan_na():
+                    m[np.isnan(x)] = True
                 return FloatingArray(x, m)
             else:
                 x[mask] = np.nan
@@ -875,6 +879,9 @@ def _maybe_mask_result(
         if result.dtype.kind == "f":
             from pandas.core.arrays import FloatingArray
 
+            if is_nan_na():
+                mask[np.isnan(result)] = True
+
             return FloatingArray(result, mask, copy=False)
 
         elif result.dtype.kind == "b":
diff --git a/pandas/core/arrays/numeric.py b/pandas/core/arrays/numeric.py
index f319a3cc05575..27ff4b7563ba9 100644
--- a/pandas/core/arrays/numeric.py
+++ b/pandas/core/arrays/numeric.py
@@ -8,6 +8,8 @@
 
 import numpy as np
 
+from pandas._config import is_nan_na
+
 from pandas._libs import (
     lib,
     missing as libmissing,
@@ -101,6 +103,8 @@ def __from_arrow__(
                 array = array.combine_chunks()
 
         data, mask = pyarrow_array_to_numpy_and_mask(array, dtype=self.numpy_dtype)
+        if data.dtype.kind == "f" and is_nan_na():
+            mask[np.isnan(data)] = False
         return array_class(data.copy(), ~mask, copy=False)
 
     @classmethod
@@ -195,9 +199,21 @@ def _coerce_to_data_and_mask(
         elif values.dtype.kind == "f":
             # np.isnan is faster than is_numeric_na() for floats
             # github issue: #60066
-            mask = np.isnan(values)
+            if is_nan_na():
+                mask = np.isnan(values)
+            else:
+                mask = np.zeros(len(values), dtype=np.bool_)
+                if dtype_cls.__name__.strip("_").startswith(("I", "U")):
+                    wrong = np.isnan(values)
+                    if wrong.any():
+                        raise ValueError("Cannot cast NaN value to Integer dtype.")
         else:
-            mask = libmissing.is_numeric_na(values)
+            if is_nan_na():
+                mask = libmissing.is_numeric_na(values)
+            else:
+                # is_numeric_na will raise on non-numeric NAs
+                libmissing.is_numeric_na(values)
+                mask = libmissing.is_pdna_or_none(values)
     else:
         assert len(mask) == len(values)
 
@@ -236,7 +252,6 @@ def _coerce_to_data_and_mask(
         values = values.astype(dtype, copy=copy)
     else:
         values = dtype_cls._safe_cast(values, dtype, copy=False)
-
     return values, mask, dtype, inferred_type
 
 
@@ -265,6 +280,10 @@ def __init__(
             # If we don't raise here, then accessing self.dtype would raise
             raise TypeError("FloatingArray does not support np.float16 dtype.")
 
+        # NB: if is_nan_na() is True
+        #  then caller is responsible for ensuring
+        #  assert mask[np.isnan(values)].all()
+
         super().__init__(values, mask, copy=copy)
 
     @cache_readonly
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index a8014afb225bb..1478380d90a7d 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -427,12 +427,12 @@ def is_terminal() -> bool:
         validator=is_one_of_factory([True, False, "warn"]),
     )
 
-with cf.config_prefix("mode"):
     cf.register_option(
         "nan_is_na",
-        True,
-        "Whether to make ArrowDtype arrays consistently treat NaN as "
-        "interchangeable with pd.NA",
+        os.environ.get("PANDAS_NAN_IS_NA", 0) == "1",
+        "Whether to treat NaN entries as interchangeable with pd.NA in "
+        "numpy-nullable and pyarrow float dtypes. See discussion in "
+        "https://github.com/pandas-dev/pandas/issues/32265",
         validator=is_one_of_factory([True, False]),
     )
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 20fe9b92b4677..a8ff49fac543d 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -18,7 +18,10 @@
 
 import numpy as np
 
-from pandas._config import using_string_dtype
+from pandas._config import (
+    is_nan_na,
+    using_string_dtype,
+)
 
 from pandas._libs import (
     Interval,
@@ -1053,7 +1056,10 @@ def convert_dtypes(
             elif input_array.dtype.kind in "fcb":
                 # TODO: de-dup with maybe_cast_to_integer_array?
                 arr = input_array[notna(input_array)]
-                if (arr.astype(int) == arr).all():
+                if len(arr) < len(input_array) and not is_nan_na():
+                    # In the presence of NaNs, we cannot convert to IntegerDtype
+                    pass
+                elif (arr.astype(int) == arr).all():
                     inferred_dtype = target_int_dtype
                 else:
                     inferred_dtype = input_array.dtype
@@ -1077,7 +1083,10 @@ def convert_dtypes(
                 if convert_integer:
                     # TODO: de-dup with maybe_cast_to_integer_array?
                     arr = input_array[notna(input_array)]
-                    if (arr.astype(int) == arr).all():
+                    if len(arr) < len(input_array) and not is_nan_na():
+                        # In the presence of NaNs, we can't convert to IntegerDtype
+                        inferred_dtype = inferred_float_dtype
+                    elif (arr.astype(int) == arr).all():
                         inferred_dtype = pandas_dtype_func("Int64")
                     else:
                         inferred_dtype = inferred_float_dtype
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index e8c5a03a6de50..be1c8365e640c 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -21,6 +21,7 @@
 
 from pandas._config import (
     get_option,
+    is_nan_na,
     using_string_dtype,
 )
 
@@ -161,6 +162,7 @@
     ExtensionArray,
     TimedeltaArray,
 )
+from pandas.core.arrays.floating import FloatingDtype
 from pandas.core.arrays.string_ import (
     StringArray,
     StringDtype,
@@ -6575,6 +6577,14 @@ def _maybe_cast_indexer(self, key):
         If we have a float key and are not a floating index, then try to cast
         to an int if equivalent.
         """
+        if (
+            is_float(key)
+            and np.isnan(key)
+            and isinstance(self.dtype, FloatingDtype)
+            and is_nan_na()
+        ):
+            # TODO: better place to do this?
+            key = self.dtype.na_value
         return key
 
     def _maybe_cast_listlike_indexer(self, target) -> Index:
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 35de97d570bd3..7de508e5a30bc 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -17,6 +17,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
+from pandas._libs.missing import NA
 
 from pandas.core.dtypes.astype import astype_is_view
 from pandas.core.dtypes.cast import (
@@ -34,7 +35,10 @@
     is_object_dtype,
     is_scalar,
 )
-from pandas.core.dtypes.dtypes import ExtensionDtype
+from pandas.core.dtypes.dtypes import (
+    BaseMaskedDtype,
+    ExtensionDtype,
+)
 from pandas.core.dtypes.generic import (
     ABCDataFrame,
     ABCSeries,
@@ -364,7 +368,11 @@ def dict_to_mgr(
 
     if columns is not None:
         columns = ensure_index(columns)
-        arrays = [np.nan] * len(columns)
+        if dtype is not None and not isinstance(dtype, np.dtype):
+            # e.g. test_dataframe_from_dict_of_series
+            arrays = [NA] * len(columns)
+        else:
+            arrays = [np.nan] * len(columns)
         midxs = set()
         data_keys = ensure_index(data.keys())  # type: ignore[arg-type]
         data_values = list(data.values())
@@ -414,12 +422,14 @@ def dict_to_mgr(
         arrays = [
             x.copy()
             if isinstance(x, ExtensionArray)
-            else x.copy(deep=True)
-            if (
-                isinstance(x, Index)
-                or (isinstance(x, ABCSeries) and is_1d_only_ea_dtype(x.dtype))
+            else (
+                x.copy(deep=True)
+                if (
+                    isinstance(x, Index)
+                    or (isinstance(x, ABCSeries) and is_1d_only_ea_dtype(x.dtype))
+                )
+                else x
             )
-            else x
             for x in arrays
         ]
 
@@ -949,10 +959,13 @@ def convert_object_array(
 
     def convert(arr):
         if dtype != np.dtype("O"):
+            # e.g. if dtype is UInt32 then we want to cast Nones to NA instead of
+            #  NaN in maybe_convert_objects.
+            to_nullable = dtype_backend != "numpy" or isinstance(dtype, BaseMaskedDtype)
             arr = lib.maybe_convert_objects(
                 arr,
                 try_float=coerce_float,
-                convert_to_nullable_dtype=dtype_backend != "numpy",
+                convert_to_nullable_dtype=to_nullable,
             )
             # Notes on cases that get here 2023-02-15
             # 1) we DO get here when arr is all Timestamps and dtype=None
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 1c79f24a9fd96..408a2f290c477 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -997,9 +997,6 @@ def _read_ujson(self) -> DataFrame | Series:
             obj = self._get_object_parser(self.data)
         if self.dtype_backend is not lib.no_default:
             with option_context("mode.nan_is_na", True):
-                # The construction above takes "null" to NaN, which we want to
-                #  convert to NA. But .convert_dtypes to pyarrow doesn't allow
-                #  that, so we do a 2-step conversion through numpy-nullable.
                 return obj.convert_dtypes(
                     infer_objects=False, dtype_backend=self.dtype_backend
                 )
@@ -1078,9 +1075,6 @@ def __next__(self) -> DataFrame | Series:
 
         if self.dtype_backend is not lib.no_default:
             with option_context("mode.nan_is_na", True):
-                # The construction above takes "null" to NaN, which we want to
-                #  convert to NA. But .convert_dtypes to pyarrow doesn't allow
-                #  that, so we do a 2-step conversion through numpy-nullable.
                 return obj.convert_dtypes(
                     infer_objects=False, dtype_backend=self.dtype_backend
                 )
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
index feca60c6e28a2..5510036e542f5 100644
--- a/pandas/io/json/_table_schema.py
+++ b/pandas/io/json/_table_schema.py
@@ -13,6 +13,8 @@
 )
 import warnings
 
+from pandas._config import option_context
+
 from pandas._libs import lib
 from pandas._libs.json import ujson_loads
 from pandas._libs.tslibs import timezones
@@ -384,7 +386,8 @@ def parse_table_schema(json, precise_float: bool) -> DataFrame:
             'table="orient" can not yet read ISO-formatted Timedelta data'
         )
 
-    df = df.astype(dtypes)
+    with option_context("mode.nan_is_na", True):
+        df = df.astype(dtypes)
 
     if "primaryKey" in table["schema"]:
         df = df.set_index(table["schema"]["primaryKey"])
diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
index 09759d4127ac8..d48e888ae3838 100644
--- a/pandas/io/parsers/arrow_parser_wrapper.py
+++ b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -19,9 +19,7 @@
     is_string_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.dtypes import (
-    BaseMaskedDtype,
-)
+from pandas.core.dtypes.dtypes import BaseMaskedDtype
 from pandas.core.dtypes.inference import is_integer
 
 from pandas.core.arrays.string_ import StringDtype
diff --git a/pandas/tests/arrays/floating/test_arithmetic.py b/pandas/tests/arrays/floating/test_arithmetic.py
index 777099e76fc73..e4e26383ae42c 100644
--- a/pandas/tests/arrays/floating/test_arithmetic.py
+++ b/pandas/tests/arrays/floating/test_arithmetic.py
@@ -35,21 +35,24 @@ def test_array_op(dtype, opname, exp):
 
 
 @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
-def test_divide_by_zero(dtype, zero, negative):
+def test_divide_by_zero(dtype, zero, negative, using_nan_is_na):
     # TODO pending NA/NaN discussion
     # https://github.com/pandas-dev/pandas/issues/32265/
     a = pd.array([0, 1, -1, None], dtype=dtype)
     result = a / zero
+    exp_mask = np.array([False, False, False, True])
+    if using_nan_is_na:
+        exp_mask[[0, -1]] = True
     expected = FloatingArray(
         np.array([np.nan, np.inf, -np.inf, np.nan], dtype=dtype.numpy_dtype),
-        np.array([False, False, False, True]),
+        exp_mask,
     )
     if negative:
         expected *= -1
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_pow_scalar(dtype):
+def test_pow_scalar(dtype, using_nan_is_na):
     a = pd.array([-1, 0, 1, None, 2], dtype=dtype)
     result = a**0
     expected = pd.array([1, 1, 1, 1, 1], dtype=dtype)
@@ -64,11 +67,14 @@ def test_pow_scalar(dtype):
     tm.assert_extension_array_equal(result, expected)
 
     result = a**np.nan
-    # TODO np.nan should be converted to pd.NA / missing before operation?
-    expected = FloatingArray(
-        np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
-        mask=a._mask,
-    )
+    if using_nan_is_na:
+        expected = pd.array([None, None, 1, None, None], dtype=dtype)
+    else:
+        # TODO np.nan should be converted to pd.NA / missing before operation?
+        expected = FloatingArray(
+            np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype=dtype.numpy_dtype),
+            mask=a._mask,
+        )
     tm.assert_extension_array_equal(result, expected)
 
     # reversed
@@ -87,9 +93,11 @@ def test_pow_scalar(dtype):
     tm.assert_extension_array_equal(result, expected)
 
     result = np.nan**a
-    expected = FloatingArray(
-        np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask
-    )
+    if not using_nan_is_na:
+        # Otherwise the previous `expected` can be reused
+        expected = FloatingArray(
+            np.array([1, np.nan, np.nan, np.nan], dtype=dtype.numpy_dtype), mask=a._mask
+        )
     tm.assert_extension_array_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/floating/test_comparison.py b/pandas/tests/arrays/floating/test_comparison.py
index a429649f1ce1d..0990757964267 100644
--- a/pandas/tests/arrays/floating/test_comparison.py
+++ b/pandas/tests/arrays/floating/test_comparison.py
@@ -38,11 +38,15 @@ def test_equals():
     assert a1.equals(a2) is False
 
 
-def test_equals_nan_vs_na():
+def test_equals_nan_vs_na(using_nan_is_na):
     # GH#44382
 
     mask = np.zeros(3, dtype=bool)
     data = np.array([1.0, np.nan, 3.0], dtype=np.float64)
+    if using_nan_is_na:
+        # Under PDEP16, all callers of the FloatingArray constructor should
+        #  ensure that mask[np.isnan(data)] = True
+        mask[1] = True
 
     left = FloatingArray(data, mask)
     assert left.equals(left)
@@ -57,7 +61,11 @@ def test_equals_nan_vs_na():
     assert right.equals(right)
     tm.assert_extension_array_equal(right, right)
 
-    assert not left.equals(right)
+    if not using_nan_is_na:
+        assert not left.equals(right)
+    else:
+        # the constructor will set the NaN locations to NA
+        assert left.equals(right)
 
     # with mask[1] = True, the only difference is data[1], which should
     #  not matter for equals
diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
index e1d237205a753..9c383efa3216c 100644
--- a/pandas/tests/arrays/floating/test_construction.py
+++ b/pandas/tests/arrays/floating/test_construction.py
@@ -85,9 +85,12 @@ def test_to_array():
         ([np.nan], [pd.NA]),
     ],
 )
-def test_to_array_none_is_nan(a, b):
+def test_to_array_none_is_nan(a, b, using_nan_is_na):
     result = pd.array(a, dtype="Float64")
     expected = pd.array(b, dtype="Float64")
+    if not using_nan_is_na and a[-1] is np.nan:
+        assert np.isnan(result[-1])
+        expected._mask[-1] = False
     tm.assert_extension_array_equal(result, expected)
 
 
@@ -189,13 +192,17 @@ def test_to_array_bool(bool_values, values, target_dtype, expected_dtype):
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_series_from_float(data):
+def test_series_from_float(data, using_nan_is_na):
     # construct from our dtype & string dtype
     dtype = data.dtype
 
     # from float
     expected = pd.Series(data)
-    result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
+    np_res = data.to_numpy(na_value=np.nan, dtype="float")
+    if not using_nan_is_na:
+        np_res = np_res.astype(object)
+        np_res[data.isna()] = pd.NA
+    result = pd.Series(np_res, dtype=str(dtype))
     tm.assert_series_equal(result, expected)
 
     # from list
diff --git a/pandas/tests/arrays/floating/test_contains.py b/pandas/tests/arrays/floating/test_contains.py
index 956642697bf32..5dff4b803d87d 100644
--- a/pandas/tests/arrays/floating/test_contains.py
+++ b/pandas/tests/arrays/floating/test_contains.py
@@ -3,10 +3,13 @@
 import pandas as pd
 
 
-def test_contains_nan():
+def test_contains_nan(using_nan_is_na):
     # GH#52840
     arr = pd.array(range(5)) / 0
 
     assert np.isnan(arr._data[0])
-    assert not arr.isna()[0]
+    if using_nan_is_na:
+        assert arr.isna()[0]
+    else:
+        assert not arr.isna()[0]
     assert np.nan in arr
diff --git a/pandas/tests/arrays/floating/test_function.py b/pandas/tests/arrays/floating/test_function.py
index dffb2a1f6e1f5..e03e8f30197b9 100644
--- a/pandas/tests/arrays/floating/test_function.py
+++ b/pandas/tests/arrays/floating/test_function.py
@@ -10,10 +10,13 @@
 @pytest.mark.parametrize("ufunc", [np.abs, np.sign])
 # np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
 @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
-def test_ufuncs_single(ufunc):
+def test_ufuncs_single(ufunc, using_nan_is_na):
     a = pd.array([1, 2, -3, pd.NA], dtype="Float64")
     result = ufunc(a)
-    expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
+    np_res = ufunc(a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
@@ -23,45 +26,66 @@ def test_ufuncs_single(ufunc):
 
 
 @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
-def test_ufuncs_single_float(ufunc):
+def test_ufuncs_single_float(ufunc, using_nan_is_na):
     a = pd.array([1.0, 0.2, 3.0, pd.NA], dtype="Float64")
     with np.errstate(invalid="ignore"):
         result = ufunc(a)
-        expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
+        np_res = ufunc(a.astype(float))
+        np_res = np_res.astype(object)
+        np_res[a.isna()] = pd.NA
+        expected = pd.array(np_res, dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
     with np.errstate(invalid="ignore"):
         result = ufunc(s)
-        expected = pd.Series(ufunc(s.astype(float)), dtype="Float64")
+        np_res = ufunc(s.astype(float))
+        np_res = np_res.astype(object)
+        np_res[a.isna()] = pd.NA
+        expected = pd.Series(np_res, dtype="Float64")
     tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize("ufunc", [np.add, np.subtract])
-def test_ufuncs_binary_float(ufunc):
+def test_ufuncs_binary_float(ufunc, using_nan_is_na):
     # two FloatingArrays
     a = pd.array([1, 0.2, -3, pd.NA], dtype="Float64")
     result = ufunc(a, a)
-    expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64")
+    np_res = ufunc(a.astype(float), a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
 
     # FloatingArray with numpy array
     arr = np.array([1, 2, 3, 4])
     result = ufunc(a, arr)
-    expected = pd.array(ufunc(a.astype(float), arr), dtype="Float64")
+    np_res = ufunc(a.astype(float), arr)
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(arr, a)
-    expected = pd.array(ufunc(arr, a.astype(float)), dtype="Float64")
+    np_res = ufunc(arr, a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
 
     # FloatingArray with scalar
     result = ufunc(a, 1)
-    expected = pd.array(ufunc(a.astype(float), 1), dtype="Float64")
+    np_res = ufunc(a.astype(float), 1)
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(1, a)
-    expected = pd.array(ufunc(1, a.astype(float)), dtype="Float64")
+    np_res = ufunc(1, a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Float64")
     tm.assert_extension_array_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py
index e954cecba417a..fc9e260923d32 100644
--- a/pandas/tests/arrays/floating/test_to_numpy.py
+++ b/pandas/tests/arrays/floating/test_to_numpy.py
@@ -7,18 +7,23 @@
 
 
 @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
-def test_to_numpy(box):
+def test_to_numpy(box, using_nan_is_na):
     con = pd.Series if box else pd.array
 
     # default (with or without missing values) -> object dtype
     arr = con([0.1, 0.2, 0.3], dtype="Float64")
     result = arr.to_numpy()
     expected = np.array([0.1, 0.2, 0.3], dtype="float64")
+    # TODO: should this be object with `not using_nan_is_na` to avoid
+    #  values-dependent behavior?
     tm.assert_numpy_array_equal(result, expected)
 
     arr = con([0.1, 0.2, None], dtype="Float64")
     result = arr.to_numpy()
-    expected = np.array([0.1, 0.2, np.nan], dtype="float64")
+    if using_nan_is_na:
+        expected = np.array([0.1, 0.2, np.nan], dtype="float64")
+    else:
+        expected = np.array([0.1, 0.2, pd.NA], dtype=object)
     tm.assert_numpy_array_equal(result, expected)
 
 
@@ -81,11 +86,18 @@ def test_to_numpy_na_value(box):
     tm.assert_numpy_array_equal(result, expected)
 
 
-def test_to_numpy_na_value_with_nan():
+def test_to_numpy_na_value_with_nan(using_nan_is_na):
     # array with both NaN and NA -> only fill NA with `na_value`
-    arr = FloatingArray(np.array([0.0, np.nan, 0.0]), np.array([False, False, True]))
+    mask = np.array([False, False, True])
+    if using_nan_is_na:
+        mask[1] = True
+    arr = FloatingArray(np.array([0.0, np.nan, 0.0]), mask)
     result = arr.to_numpy(dtype="float64", na_value=-1)
-    expected = np.array([0.0, np.nan, -1.0], dtype="float64")
+    if using_nan_is_na:
+        # the NaN passed to the constructor is considered as NA
+        expected = np.array([0.0, -1.0, -1.0], dtype="float64")
+    else:
+        expected = np.array([0.0, np.nan, -1.0], dtype="float64")
     tm.assert_numpy_array_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py
index aeceb9b8a3cb1..e16ab6f23b417 100644
--- a/pandas/tests/arrays/integer/test_arithmetic.py
+++ b/pandas/tests/arrays/integer/test_arithmetic.py
@@ -52,13 +52,16 @@ def test_div(dtype):
 
 
 @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
-def test_divide_by_zero(zero, negative):
+def test_divide_by_zero(zero, negative, using_nan_is_na):
     # https://github.com/pandas-dev/pandas/issues/27398, GH#22793
     a = pd.array([0, 1, -1, None], dtype="Int64")
     result = a / zero
+    exp_mask = np.array([False, False, False, True])
+    if using_nan_is_na:
+        exp_mask[0] = True
     expected = FloatingArray(
         np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
-        np.array([False, False, False, True]),
+        exp_mask,
     )
     if negative:
         expected *= -1
@@ -99,7 +102,7 @@ def test_mod(dtype):
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_pow_scalar():
+def test_pow_scalar(using_nan_is_na):
     a = pd.array([-1, 0, 1, None, 2], dtype="Int64")
     result = a**0
     expected = pd.array([1, 1, 1, 1, 1], dtype="Int64")
@@ -114,10 +117,13 @@ def test_pow_scalar():
     tm.assert_extension_array_equal(result, expected)
 
     result = a**np.nan
-    expected = FloatingArray(
-        np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
-        np.array([False, False, False, True, False]),
-    )
+    if using_nan_is_na:
+        expected = expected.astype("Float64")
+    else:
+        expected = FloatingArray(
+            np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
+            np.array([False, False, False, True, False]),
+        )
     tm.assert_extension_array_equal(result, expected)
 
     # reversed
@@ -136,10 +142,13 @@ def test_pow_scalar():
     tm.assert_extension_array_equal(result, expected)
 
     result = np.nan**a
-    expected = FloatingArray(
-        np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
-        np.array([False, False, True, False]),
-    )
+    if using_nan_is_na:
+        expected = expected.astype("Float64")
+    else:
+        expected = FloatingArray(
+            np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
+            np.array([False, False, True, False]),
+        )
     tm.assert_extension_array_equal(result, expected)
 
 
@@ -212,7 +221,7 @@ def test_error_invalid_values(data, all_arithmetic_operators):
 # TODO test unsigned overflow
 
 
-def test_arith_coerce_scalar(data, all_arithmetic_operators):
+def test_arith_coerce_scalar(data, all_arithmetic_operators, using_nan_is_na):
     op = tm.get_op_from_name(all_arithmetic_operators)
     s = pd.Series(data)
     other = 0.01
@@ -220,9 +229,11 @@ def test_arith_coerce_scalar(data, all_arithmetic_operators):
     result = op(s, other)
     expected = op(s.astype(float), other)
     expected = expected.astype("Float64")
+    if not using_nan_is_na:
+        expected[s.isna()] = pd.NA
 
     # rmod results in NaN that wasn't NA in original nullable Series -> unmask it
-    if all_arithmetic_operators == "__rmod__":
+    if all_arithmetic_operators == "__rmod__" and not using_nan_is_na:
         mask = (s == 0).fillna(False).to_numpy(bool)
         expected.array._mask[mask] = False
 
diff --git a/pandas/tests/arrays/integer/test_construction.py b/pandas/tests/arrays/integer/test_construction.py
index 8eaa9ace027c9..ab52fbec45f79 100644
--- a/pandas/tests/arrays/integer/test_construction.py
+++ b/pandas/tests/arrays/integer/test_construction.py
@@ -26,14 +26,20 @@ def test_uses_pandas_na():
     assert a[1] is pd.NA
 
 
-def test_from_dtype_from_float(data):
+def test_from_dtype_from_float(data, using_nan_is_na):
     # construct from our dtype & string dtype
     dtype = data.dtype
 
     # from float
     expected = pd.Series(data)
-    result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
-    tm.assert_series_equal(result, expected)
+    arr = data.to_numpy(na_value=np.nan, dtype="float")
+    if using_nan_is_na:
+        result = pd.Series(arr, dtype=str(dtype))
+        tm.assert_series_equal(result, expected)
+    else:
+        msg = "Cannot cast NaN value to Integer dtype"
+        with pytest.raises(ValueError, match=msg):
+            pd.Series(arr, dtype=str(dtype))
 
     # from int / list
     expected = pd.Series(data)
@@ -116,10 +122,15 @@ def test_integer_array_constructor_copy():
         ([np.nan, np.nan], [np.nan, np.nan]),
     ],
 )
-def test_to_integer_array_none_is_nan(a, b):
-    result = pd.array(a, dtype="Int64")
-    expected = pd.array(b, dtype="Int64")
-    tm.assert_extension_array_equal(result, expected)
+def test_to_integer_array_none_is_nan(a, b, using_nan_is_na):
+    if using_nan_is_na:
+        result = pd.array(a, dtype="Int64")
+        expected = pd.array(b, dtype="Int64")
+        tm.assert_extension_array_equal(result, expected)
+    else:
+        msg = "Cannot cast NaN value to Integer dtype"
+        with pytest.raises(ValueError, match=msg):
+            pd.array(b, dtype="Int64")
 
 
 @pytest.mark.parametrize(
@@ -139,6 +150,7 @@ def test_to_integer_array_error(values):
     # error in converting existing arrays to IntegerArrays
     msg = "|".join(
         [
+            "cannot convert float NaN to integer",  # with not using_nan_is_na
             r"cannot be converted to IntegerDtype",
             r"invalid literal for int\(\) with base 10:",
             r"values must be a 1D list-like",
@@ -214,8 +226,16 @@ def test_to_integer_array_str():
     ],
 )
 def test_to_integer_array_bool(
-    constructor, bool_values, int_values, target_dtype, expected_dtype
+    constructor, bool_values, int_values, target_dtype, expected_dtype, using_nan_is_na
 ):
+    if not using_nan_is_na and np.isnan(bool_values[-1]):
+        msg = "Cannot cast NaN value to Integer dtype"
+        with pytest.raises(ValueError, match=msg):
+            constructor(bool_values, dtype=target_dtype)
+        with pytest.raises(ValueError, match=msg):
+            pd.array(int_values, dtype=target_dtype)
+        return
+
     result = constructor(bool_values, dtype=target_dtype)
     assert result.dtype == expected_dtype
     expected = pd.array(int_values, dtype=target_dtype)
@@ -230,8 +250,16 @@ def test_to_integer_array_bool(
         (np.array([1, np.nan]), "int8", Int8Dtype),
     ],
 )
-def test_to_integer_array(values, to_dtype, result_dtype):
+def test_to_integer_array(values, to_dtype, result_dtype, using_nan_is_na):
     # convert existing arrays to IntegerArrays
+    if not using_nan_is_na and np.isnan(values[-1]):
+        msg = "Cannot cast NaN value to Integer dtype"
+        with pytest.raises(ValueError, match=msg):
+            IntegerArray._from_sequence(values, dtype=to_dtype)
+        with pytest.raises(ValueError, match=msg):
+            pd.array(values, dtype=result_dtype())
+        return
+
     result = IntegerArray._from_sequence(values, dtype=to_dtype)
     assert result.dtype == result_dtype()
     expected = pd.array(values, dtype=result_dtype())
diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index 77a0dd12534cc..892a7a2be7b5c 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -9,24 +9,33 @@
 @pytest.mark.parametrize("ufunc", [np.abs, np.sign])
 # np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
 @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
-def test_ufuncs_single_int(ufunc):
+def test_ufuncs_single_int(ufunc, using_nan_is_na):
     a = pd.array([1, 2, -3, pd.NA], dtype="Int64")
     result = ufunc(a)
-    expected = pd.array(ufunc(a.astype(float)), dtype="Int64")
+    np_res = ufunc(a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[-1] = pd.NA
+    expected = pd.array(np_res, dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
     result = ufunc(s)
-    expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64"))
+    np_res = ufunc(a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[-1] = pd.NA
+    expected = pd.Series(pd.array(np_res, dtype="Int64"))
     tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
-def test_ufuncs_single_float(ufunc):
+def test_ufuncs_single_float(ufunc, using_nan_is_na):
     a = pd.array([1, 2, -3, pd.NA], dtype="Int64")
     with np.errstate(invalid="ignore"):
         result = ufunc(a)
-        expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
+        if using_nan_is_na:
+            expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
+        else:
+            expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
     tm.assert_extension_array_equal(result, expected)
 
     s = pd.Series(a)
@@ -41,34 +50,56 @@ def test_ufuncs_binary_int(ufunc):
     # two IntegerArrays
     a = pd.array([1, 2, -3, pd.NA], dtype="Int64")
     result = ufunc(a, a)
-    expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64")
+    np_res = ufunc(a.astype(float), a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
     # IntegerArray with numpy array
     arr = np.array([1, 2, 3, 4])
     result = ufunc(a, arr)
-    expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64")
+    np_res = ufunc(a.astype(float), arr)
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(arr, a)
-    expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64")
+    np_res = ufunc(arr, a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
     # IntegerArray with scalar
     result = ufunc(a, 1)
-    expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64")
+    np_res = ufunc(a.astype(float), 1)
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
     result = ufunc(1, a)
-    expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64")
+    np_res = ufunc(1, a.astype(float))
+    np_res = np_res.astype(object)
+    np_res[a.isna()] = pd.NA
+    expected = pd.array(np_res, dtype="Int64")
     tm.assert_extension_array_equal(result, expected)
 
 
-def test_ufunc_binary_output():
-    a = pd.array([1, 2, np.nan])
+def test_ufunc_binary_output(using_nan_is_na):
+    a = pd.array([1, 2, pd.NA], dtype="Int64")
     result = np.modf(a)
-    expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float"))
-    expected = (pd.array(expected[0]), pd.array(expected[1]))
+    np_res = np.modf(a.to_numpy(na_value=np.nan, dtype="float"))
+
+    np_res = list(np_res)
+    np_res[0] = np_res[0].astype(object)
+    np_res[1] = np_res[1].astype(object)
+    np_res[0][-1] = pd.NA
+    np_res[1][-1] = pd.NA
+
+    expected = (pd.array(np_res[0]), pd.array(np_res[1]))
 
     assert isinstance(result, tuple)
     assert len(result) == 2
diff --git a/pandas/tests/arrays/integer/test_reduction.py b/pandas/tests/arrays/integer/test_reduction.py
index 1c91cd25ba69c..f456d06a49fe5 100644
--- a/pandas/tests/arrays/integer/test_reduction.py
+++ b/pandas/tests/arrays/integer/test_reduction.py
@@ -96,8 +96,8 @@ def test_groupby_reductions(op, expected):
         ["median", Series([2, 2], index=["B", "C"], dtype="Float64")],
         ["var", Series([2, 2], index=["B", "C"], dtype="Float64")],
         ["std", Series([2**0.5, 2**0.5], index=["B", "C"], dtype="Float64")],
-        ["skew", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
-        ["kurt", Series([pd.NA, pd.NA], index=["B", "C"], dtype="Float64")],
+        ["skew", Series([np.nan, pd.NA], index=["B", "C"], dtype="Float64")],
+        ["kurt", Series([np.nan, pd.NA], index=["B", "C"], dtype="Float64")],
         ["any", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
         ["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
     ],
diff --git a/pandas/tests/arrays/interval/test_interval_pyarrow.py b/pandas/tests/arrays/interval/test_interval_pyarrow.py
index ef8701be81e2b..c8692bb98f346 100644
--- a/pandas/tests/arrays/interval/test_interval_pyarrow.py
+++ b/pandas/tests/arrays/interval/test_interval_pyarrow.py
@@ -51,7 +51,7 @@ def test_arrow_array():
         pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left"))
 
 
-def test_arrow_array_missing():
+def test_arrow_array_missing(using_nan_is_na):
     pa = pytest.importorskip("pyarrow")
 
     from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
diff --git a/pandas/tests/arrays/masked/test_function.py b/pandas/tests/arrays/masked/test_function.py
index b4b1761217826..38a9488e5707d 100644
--- a/pandas/tests/arrays/masked/test_function.py
+++ b/pandas/tests/arrays/masked/test_function.py
@@ -38,17 +38,18 @@ def numpy_dtype(data):
 def test_round(data, numpy_dtype):
     # No arguments
     result = data.round()
-    expected = pd.array(
-        np.round(data.to_numpy(dtype=numpy_dtype, na_value=None)), dtype=data.dtype
-    )
+    np_result = np.round(data.to_numpy(dtype=numpy_dtype, na_value=None))
+    exp_np = np_result.astype(object)
+    exp_np[data.isna()] = pd.NA
+    expected = pd.array(exp_np, dtype=data.dtype)
     tm.assert_extension_array_equal(result, expected)
 
     # Decimals argument
     result = data.round(decimals=2)
-    expected = pd.array(
-        np.round(data.to_numpy(dtype=numpy_dtype, na_value=None), decimals=2),
-        dtype=data.dtype,
-    )
+    np_result = np.round(data.to_numpy(dtype=numpy_dtype, na_value=None), decimals=2)
+    exp_np = np_result.astype(object)
+    exp_np[data.isna()] = pd.NA
+    expected = pd.array(exp_np, dtype=data.dtype)
     tm.assert_extension_array_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py
index 96e1cc05e284c..06a910aa06108 100644
--- a/pandas/tests/arrays/string_/test_string.py
+++ b/pandas/tests/arrays/string_/test_string.py
@@ -527,7 +527,8 @@ def test_astype_float(dtype, any_float_dtype):
     # Don't compare arrays (37974)
     ser = pd.Series(["1.1", pd.NA, "3.3"], dtype=dtype)
     result = ser.astype(any_float_dtype)
-    expected = pd.Series([1.1, np.nan, 3.3], dtype=any_float_dtype)
+    item = np.nan if isinstance(result.dtype, np.dtype) else pd.NA
+    expected = pd.Series([1.1, item, 3.3], dtype=any_float_dtype)
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
index 821f51ee95ad3..cdf3b549bddee 100644
--- a/pandas/tests/base/test_conversion.py
+++ b/pandas/tests/base/test_conversion.py
@@ -355,7 +355,10 @@ def test_array_multiindex_raises():
         ),
     ],
 )
-def test_to_numpy(arr, expected, zero_copy, index_or_series_or_array):
+def test_to_numpy(arr, expected, zero_copy, index_or_series_or_array, using_nan_is_na):
+    if not using_nan_is_na and arr[-1] is pd.NA:
+        expected = np.array([0, pd.NA], dtype=object)
+
     box = index_or_series_or_array
 
     with tm.assert_produces_warning(None):
diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py
index 7f094db6ea524..6e55531bbce8f 100644
--- a/pandas/tests/base/test_unique.py
+++ b/pandas/tests/base/test_unique.py
@@ -30,7 +30,7 @@ def test_unique(index_or_series_obj):
 
 @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
 @pytest.mark.parametrize("null_obj", [np.nan, None])
-def test_unique_null(null_obj, index_or_series_obj):
+def test_unique_null(null_obj, index_or_series_obj, using_nan_is_na):
     obj = index_or_series_obj
 
     if not allow_na_ops(obj):
@@ -39,6 +39,12 @@ def test_unique_null(null_obj, index_or_series_obj):
         pytest.skip("Test doesn't make sense on empty data")
     elif isinstance(obj, pd.MultiIndex):
         pytest.skip(f"MultiIndex can't hold '{null_obj}'")
+    elif (
+        null_obj is not None
+        and not using_nan_is_na
+        and obj.dtype in ["Int64", "UInt16", "Float32"]
+    ):
+        pytest.skip("NaN is not a valid NA for this dtype.")
 
     values = obj._values
     values[0:2] = null_obj
diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py
index 79eb64b5a654f..3e4075911f735 100644
--- a/pandas/tests/extension/base/interface.py
+++ b/pandas/tests/extension/base/interface.py
@@ -31,7 +31,7 @@ def test_can_hold_na_valid(self, data):
         # GH-20761
         assert data._can_hold_na is True
 
-    def test_contains(self, data, data_missing):
+    def test_contains(self, data, data_missing, using_nan_is_na):
         # GH-37867
         # Tests for membership checks. Membership checks for nan-likes is tricky and
         # the settled on rule is: `nan_like in arr` is True if nan_like is
@@ -55,7 +55,21 @@ def test_contains(self, data, data_missing):
                 # type check for e.g. two instances of Decimal("NAN")
                 continue
             assert na_value_obj not in data
-            assert na_value_obj not in data_missing
+            if (
+                using_nan_is_na
+                and isinstance(na_value_obj, float)
+                and isinstance(
+                    data,
+                    (
+                        pd.core.arrays.BaseMaskedArray,
+                        pd.core.arrays.ArrowExtensionArray,
+                    ),
+                )
+            ):
+                # TODO: wrong place for this override
+                assert na_value_obj in data_missing
+            else:
+                assert na_value_obj not in data_missing
 
     def test_memory_usage(self, data):
         s = pd.Series(data)
diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py
index 0e9ffce07bf98..07e1c1d96a196 100644
--- a/pandas/tests/extension/test_masked.py
+++ b/pandas/tests/extension/test_masked.py
@@ -176,20 +176,23 @@ def skip_if_doesnt_support_2d(self, dtype, request):
         #  override becomes unnecessary.
 
     @pytest.mark.parametrize("na_action", [None, "ignore"])
-    def test_map(self, data_missing, na_action):
+    def test_map(self, data_missing, na_action, using_nan_is_na):
         result = data_missing.map(lambda x: x, na_action=na_action)
-        if data_missing.dtype == Float32Dtype():
+        if data_missing.dtype == Float32Dtype() and using_nan_is_na:
             # map roundtrips through objects, which converts to float64
             expected = data_missing.to_numpy(dtype="float64", na_value=np.nan)
         else:
             expected = data_missing.to_numpy()
         tm.assert_numpy_array_equal(result, expected)
 
-    def test_map_na_action_ignore(self, data_missing_for_sorting):
+    def test_map_na_action_ignore(self, data_missing_for_sorting, using_nan_is_na):
         zero = data_missing_for_sorting[2]
         result = data_missing_for_sorting.map(lambda x: zero, na_action="ignore")
         if data_missing_for_sorting.dtype.kind == "b":
             expected = np.array([False, pd.NA, False], dtype=object)
+        elif not using_nan_is_na:
+            # TODO: would we prefer to get NaN in this case to get a non-object?
+            expected = np.array([zero, pd.NA, zero], dtype=object)
         else:
             expected = np.array([zero, np.nan, zero])
         tm.assert_numpy_array_equal(result, expected)
@@ -220,8 +223,7 @@ def _cast_pointwise_result(self, op_name: str, obj, other, pointwise_result):
 
         if sdtype.kind in "iu":
             if op_name in ("__rtruediv__", "__truediv__", "__div__"):
-                filled = expected.fillna(np.nan)
-                expected = filled.astype("Float64")
+                expected = expected.astype("Float64")
             else:
                 # combine method result in 'biggest' (int64) dtype
                 expected = expected.astype(sdtype)
@@ -392,7 +394,9 @@ def check_accumulate(self, ser: pd.Series, op_name: str, skipna: bool):
         expected = pd.Series(
             pd.array(
                 getattr(ser.astype("float64"), op_name)(skipna=skipna),
-                dtype=expected_dtype,
+                dtype="Float64",
             )
         )
+        expected[np.isnan(expected)] = pd.NA
+        expected = expected.astype(expected_dtype)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index c428bd1820cb1..d25cb2d4b8e6e 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -365,12 +365,22 @@ def test_astype_extension_dtypes_1d(self, any_int_ea_dtype):
         tm.assert_frame_equal(df.astype("int64").astype(dtype), expected1)
 
     @pytest.mark.parametrize("dtype", ["category", "Int64"])
-    def test_astype_extension_dtypes_duplicate_col(self, dtype):
+    def test_astype_extension_dtypes_duplicate_col(self, dtype, using_nan_is_na):
         # GH#24704
         a1 = Series([0, np.nan, 4], name="a")
         a2 = Series([np.nan, 3, 5], name="a")
         df = concat([a1, a2], axis=1)
 
+        if dtype == "Int64" and not using_nan_is_na:
+            msg = "Cannot cast NaN value to Integer dtype"
+            with pytest.raises(ValueError, match=msg):
+                df.astype(dtype)
+            with pytest.raises(ValueError, match=msg):
+                a1.astype(dtype)
+            with pytest.raises(ValueError, match=msg):
+                a2.astype(dtype)
+            return
+
         result = df.astype(dtype)
         expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
index 9e302dc5f94ee..41f72d17ebef7 100644
--- a/pandas/tests/frame/methods/test_replace.py
+++ b/pandas/tests/frame/methods/test_replace.py
@@ -643,7 +643,7 @@ def test_replace_mixed3(self):
 
     def test_replace_nullable_int_with_string_doesnt_cast(self):
         # GH#25438 don't cast df['a'] to float64
-        df = DataFrame({"a": [1, 2, 3, np.nan], "b": ["some", "strings", "here", "he"]})
+        df = DataFrame({"a": [1, 2, 3, pd.NA], "b": ["some", "strings", "here", "he"]})
         df["a"] = df["a"].astype("Int64")
 
         res = df.replace("", np.nan)
@@ -681,7 +681,7 @@ def test_replace_simple_nested_dict_with_nonexistent_value(self):
 
     def test_replace_NA_with_None(self):
         # gh-45601
-        df = DataFrame({"value": [42, None]}).astype({"value": "Int64"})
+        df = DataFrame({"value": [42, pd.NA]}, dtype="Int64")
         result = df.replace({pd.NA: None})
         expected = DataFrame({"value": [42, None]}, dtype=object)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py
index cc23c292b66dc..5aacd2df11873 100644
--- a/pandas/tests/frame/test_reductions.py
+++ b/pandas/tests/frame/test_reductions.py
@@ -2121,7 +2121,9 @@ def test_fails_on_non_numeric(kernel):
     ],
 )
 @pytest.mark.parametrize("min_count", [0, 2])
-def test_numeric_ea_axis_1(method, skipna, min_count, any_numeric_ea_dtype):
+def test_numeric_ea_axis_1(
+    method, skipna, min_count, any_numeric_ea_dtype, using_nan_is_na
+):
     # GH 54341
     df = DataFrame(
         {
@@ -2170,5 +2172,11 @@ def test_numeric_ea_axis_1(method, skipna, min_count, any_numeric_ea_dtype):
     result = getattr(df, method)(axis=1, **kwargs)
     expected = getattr(expected_df, method)(axis=1, **kwargs)
     if method not in ("idxmax", "idxmin"):
-        expected = expected.astype(expected_dtype)
+        if using_nan_is_na:
+            expected = expected.astype(expected_dtype)
+        else:
+            mask = np.isnan(expected)
+            expected[mask] = 0
+            expected = expected.astype(expected_dtype)
+            expected[mask] = pd.NA
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py
index 28cb25b515ed2..815513fe96009 100644
--- a/pandas/tests/groupby/methods/test_quantile.py
+++ b/pandas/tests/groupby/methods/test_quantile.py
@@ -255,7 +255,9 @@ def test_groupby_quantile_raises_on_invalid_dtype(q, numeric_only):
 
 def test_groupby_quantile_NA_float(any_float_dtype):
     # GH#42849
-    df = DataFrame({"x": [1, 1], "y": [0.2, np.nan]}, dtype=any_float_dtype)
+    dtype = pd.Series([], dtype=any_float_dtype).dtype
+    item = np.nan if isinstance(dtype, np.dtype) else pd.NA
+    df = DataFrame({"x": [1, 1], "y": [0.2, item]}, dtype=any_float_dtype)
     result = df.groupby("x")["y"].quantile(0.5)
     exp_index = Index([1.0], dtype=any_float_dtype, name="x")
 
@@ -353,7 +355,7 @@ def test_groupby_quantile_allNA_column(dtype):
     df = DataFrame({"x": [1, 1], "y": [pd.NA] * 2}, dtype=dtype)
     result = df.groupby("x")["y"].quantile(0.5)
     expected = pd.Series(
-        [np.nan], dtype=dtype, index=Index([1.0], dtype=dtype), name="y"
+        [pd.NA], dtype=dtype, index=Index([1.0], dtype=dtype), name="y"
     )
     expected.index.name = "x"
     tm.assert_series_equal(expected, result)
diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py
index e60e7d6bc05d4..977d98f81e0f3 100644
--- a/pandas/tests/groupby/test_reductions.py
+++ b/pandas/tests/groupby/test_reductions.py
@@ -186,9 +186,10 @@ def test_masked_kleene_logic(all_boolean_reductions, skipna, data):
 )
 def test_masked_mixed_types(dtype1, dtype2, exp_col1, exp_col2):
     # GH#37506
-    data = [1.0, np.nan]
+    data1 = [1.0, np.nan] if dtype1.startswith("f") else [1.0, pd.NA]
+    data2 = [1.0, np.nan] if dtype2.startswith("f") else [1.0, pd.NA]
     df = DataFrame(
-        {"col1": pd.array(data, dtype=dtype1), "col2": pd.array(data, dtype=dtype2)}
+        {"col1": pd.array(data1, dtype=dtype1), "col2": pd.array(data2, dtype=dtype2)}
     )
     result = df.groupby([1, 1]).agg("all", skipna=False)
 
diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py
index cf5fc2977a28f..c134e44681122 100644
--- a/pandas/tests/indexes/multi/test_constructors.py
+++ b/pandas/tests/indexes/multi/test_constructors.py
@@ -671,7 +671,7 @@ def test_from_frame_missing_values_multiIndex():
     multi_indexed = MultiIndex.from_frame(df)
     expected = MultiIndex.from_arrays(
         [
-            Series([1, 2, None]).astype("Int64"),
+            Series([1, 2, None], dtype="Int64"),
             pd.Float64Dtype().__from_arrow__(pa.array([0.2, None, None])),
         ],
         names=["a", "b"],
diff --git a/pandas/tests/indexes/numeric/test_indexing.py b/pandas/tests/indexes/numeric/test_indexing.py
index b29f783203177..2f37b15ca74f5 100644
--- a/pandas/tests/indexes/numeric/test_indexing.py
+++ b/pandas/tests/indexes/numeric/test_indexing.py
@@ -339,35 +339,50 @@ def test_get_loc_masked_na(self, any_numeric_ea_and_arrow_dtype):
         with pytest.raises(KeyError, match="NA"):
             idx.get_loc(NA)
 
-    def test_get_loc_masked_na_and_nan(self):
+    def test_get_loc_masked_na_and_nan(self, using_nan_is_na):
         # GH#39133
-        idx = Index(
-            FloatingArray(
-                np.array([1, 2, 1, np.nan]), mask=np.array([False, False, True, False])
-            )
-        )
-        result = idx.get_loc(NA)
-        assert result == 2
-        result = idx.get_loc(np.nan)
-        assert result == 3
+        mask = np.array([False, False, True, False])
+        if using_nan_is_na:
+            mask[-1] = True
+
+        idx = Index(FloatingArray(np.array([1, 2, 1, np.nan]), mask=mask))
+        if using_nan_is_na:
+            # NaN and NA are consistently treated as the same
+            result = idx.get_loc(NA)
+            expected = np.array([False, False, True, True])
+            tm.assert_numpy_array_equal(result, expected)
+            result = idx.get_loc(np.nan)
+            tm.assert_numpy_array_equal(result, expected)
+        else:
+            result = idx.get_loc(NA)
+            assert result == 2
+            result = idx.get_loc(np.nan)
+            assert result == 3
 
         idx = Index(
             FloatingArray(np.array([1, 2, 1.0]), mask=np.array([False, False, True]))
         )
         result = idx.get_loc(NA)
         assert result == 2
-        with pytest.raises(KeyError, match="nan"):
-            idx.get_loc(np.nan)
+        if using_nan_is_na:
+            result = idx.get_loc(np.nan)
+            assert result == 2
+        else:
+            with pytest.raises(KeyError, match="nan"):
+                idx.get_loc(np.nan)
 
-        idx = Index(
-            FloatingArray(
-                np.array([1, 2, np.nan]), mask=np.array([False, False, False])
-            )
-        )
+        mask = np.array([False, False, False])
+        if using_nan_is_na:
+            mask[-1] = True
+        idx = Index(FloatingArray(np.array([1, 2, np.nan]), mask=mask))
         result = idx.get_loc(np.nan)
         assert result == 2
-        with pytest.raises(KeyError, match="NA"):
-            idx.get_loc(NA)
+        if using_nan_is_na:
+            result = idx.get_loc(NA)
+            assert result == 2
+        else:
+            with pytest.raises(KeyError, match="NA"):
+                idx.get_loc(NA)
 
     @pytest.mark.parametrize("val", [4, 2])
     def test_get_indexer_masked_na(self, any_numeric_ea_and_arrow_dtype, val):
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index c04ea129590bc..5414389f52fc5 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -10,6 +10,7 @@
 from pandas.errors import IndexingError
 
 from pandas import (
+    NA,
     Categorical,
     CategoricalDtype,
     DataFrame,
@@ -794,9 +795,9 @@ def test_iloc_mask(self):
                     idx is None or (idx == "index" and method != ".iloc")
                 ) and "0b" in expected_result:
                     # For successful numeric results, exact match is needed
-                    assert expected_result == answer, (
-                        f"[{key}] does not match [{answer}]"
-                    )
+                    assert (
+                        expected_result == answer
+                    ), f"[{key}] does not match [{answer}]"
                 else:
                     # For error messages, substring match is sufficient
                     assert expected_result in answer, f"[{key}] not found in [{answer}]"
@@ -1480,8 +1481,10 @@ def test_iloc_setitem_pure_position_based(self):
     def test_iloc_nullable_int64_size_1_nan(self):
         # GH 31861
         result = DataFrame({"a": ["test"], "b": [np.nan]})
+
+        ser = Series([NA], name="b", dtype="Int64")
         with pytest.raises(TypeError, match="Invalid value"):
-            result.loc[:, "b"] = result.loc[:, "b"].astype("Int64")
+            result.loc[:, "b"] = ser
 
     def test_iloc_arrow_extension_array(self):
         # GH#61311
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index ccb58aae2783f..3aa2eb2e42f91 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -2929,7 +2929,7 @@ def test_loc_getitem_multiindex_tuple_level():
 def test_loc_getitem_nullable_index_with_duplicates():
     # GH#34497
     df = DataFrame(
-        data=np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, np.nan, np.nan]]).T,
+        data=np.array([[1, 2, 3, 4], [5, 6, 7, 8], [1, 2, pd.NA, pd.NA]]).T,
         columns=["a", "b", "c"],
         dtype="Int64",
     )
diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py
index 63332fe4658e5..10335ff716c1f 100644
--- a/pandas/tests/reshape/test_cut.py
+++ b/pandas/tests/reshape/test_cut.py
@@ -656,8 +656,10 @@ def test_cut_incorrect_labels(labels):
 def test_cut_nullable_integer(bins, right, include_lowest):
     a = np.random.default_rng(2).integers(0, 10, size=50).astype(float)
     a[::2] = np.nan
+    b = a.astype(object)
+    b[::2] = pd.NA
     result = cut(
-        pd.array(a, dtype="Int64"), bins, right=right, include_lowest=include_lowest
+        pd.array(b, dtype="Int64"), bins, right=right, include_lowest=include_lowest
     )
     expected = cut(a, bins, right=right, include_lowest=include_lowest)
     tm.assert_categorical_equal(result, expected)
diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py
index 2c441a6ed91c1..3e8c13685aca1 100644
--- a/pandas/tests/series/accessors/test_dt_accessor.py
+++ b/pandas/tests/series/accessors/test_dt_accessor.py
@@ -732,9 +732,9 @@ def test_dt_timetz_accessor(self, tz_naive_fixture):
         "input_series, expected_output",
         [
             [["2020-01-01"], [[2020, 1, 3]]],
-            [[pd.NaT], [[np.nan, np.nan, np.nan]]],
+            [[pd.NaT], [[None, None, None]]],
             [["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]],
-            [["2010-01-01", pd.NaT], [[2009, 53, 5], [np.nan, np.nan, np.nan]]],
+            [["2010-01-01", pd.NaT], [[2009, 53, 5], [None, None, None]]],
             # see GH#36032
             [["2016-01-08", "2016-01-04"], [[2016, 1, 5], [2016, 1, 1]]],
             [["2016-01-07", "2016-01-01"], [[2016, 1, 4], [2015, 53, 5]]],
diff --git a/pandas/tests/series/methods/test_case_when.py b/pandas/tests/series/methods/test_case_when.py
index acfc58bea728e..7cb60a11644a3 100644
--- a/pandas/tests/series/methods/test_case_when.py
+++ b/pandas/tests/series/methods/test_case_when.py
@@ -2,7 +2,6 @@
 import pytest
 
 from pandas import (
-    NA,
     DataFrame,
     Series,
     array as pd_array,
@@ -100,7 +99,7 @@ def test_case_when_multiple_conditions_replacement_extension_dtype(df):
             (df["a"].gt(1) & df["b"].eq(5), pd_array([1, 2, 3], dtype="Int64")),
         ],
     )
-    expected = Series([1, 2, NA], dtype="Float64")
+    expected = Series([1, 2, np.nan], dtype="Float64")
     tm.assert_series_equal(result, expected)
 
 
diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py
index 8ed422fc118dc..c1ee7f8c9e008 100644
--- a/pandas/tests/series/methods/test_clip.py
+++ b/pandas/tests/series/methods/test_clip.py
@@ -43,21 +43,16 @@ def test_clip_types_and_nulls(self):
             assert list(isna(s)) == list(isna(lower))
             assert list(isna(s)) == list(isna(upper))
 
-    def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture):
+    def test_series_clipping_with_na_values(self, any_numeric_ea_dtype):
         # Ensure that clipping method can handle NA values with out failing
         # GH#40581
 
-        if nulls_fixture is pd.NaT:
-            # constructor will raise, see
-            #  test_constructor_mismatched_null_nullable_dtype
-            pytest.skip("See test_constructor_mismatched_null_nullable_dtype")
-
-        ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
+        ser = Series([pd.NA, 1.0, 3.0], dtype=any_numeric_ea_dtype)
         s_clipped_upper = ser.clip(upper=2.0)
         s_clipped_lower = ser.clip(lower=2.0)
 
-        expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
-        expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)
+        expected_upper = Series([pd.NA, 1.0, 2.0], dtype=any_numeric_ea_dtype)
+        expected_lower = Series([pd.NA, 2.0, 3.0], dtype=any_numeric_ea_dtype)
 
         tm.assert_series_equal(s_clipped_upper, expected_upper)
         tm.assert_series_equal(s_clipped_lower, expected_lower)
diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py
index 324e03894e92c..e36baba5e0108 100644
--- a/pandas/tests/series/methods/test_convert_dtypes.py
+++ b/pandas/tests/series/methods/test_convert_dtypes.py
@@ -182,6 +182,7 @@ def test_convert_dtypes(
         expected_other,
         params,
         using_infer_string,
+        using_nan_is_na,
     ):
         if (
             hasattr(data, "dtype")
@@ -224,6 +225,16 @@ def test_convert_dtypes(
             # If convert_string=False and infer_objects=True, we end up with the
             # default string dtype instead of preserving object for string data
             expected_dtype = pd.StringDtype(na_value=np.nan)
+        if (
+            not using_nan_is_na
+            and expected_dtype == "Int64"
+            and isinstance(data[1], float)
+            and np.isnan(data[1])
+        ):
+            if params_dict["convert_floating"]:
+                expected_dtype = "Float64"
+            else:
+                expected_dtype = "float64"
 
         expected = pd.Series(data, dtype=expected_dtype)
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index 7d96f7f862fce..55ee660d09067 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -279,7 +279,10 @@ def test_rank_tie_methods(
         method, exp = results
         if (
             dtype == "int64"
-            or (dtype in ["int64[pyarrow]", "uint64[pyarrow]"] and not using_nan_is_na)
+            or (
+                dtype in ["int64[pyarrow]", "uint64[pyarrow]", "Int64"]
+                and not using_nan_is_na
+            )
             or (not using_infer_string and dtype == "str")
         ):
             pytest.skip("int64/str does not support NaN")
@@ -300,6 +303,11 @@ def test_rank_tie_methods(
             elif method == "first":
                 exp[np.isnan(ser)] = [9, 10]
 
+        if dtype == "string[pyarrow]" and not using_nan_is_na:
+            mask = np.isnan(exp)
+            exp = exp.astype(object)
+            exp[mask] = NA
+
         expected = Series(exp, dtype=expected_dtype(dtype, method))
         tm.assert_series_equal(result, expected)
 
@@ -320,7 +328,15 @@ def test_rank_tie_methods(
         ],
     )
     def test_rank_tie_methods_on_infs_nans(
-        self, rank_method, na_option, ascending, dtype, na_value, pos_inf, neg_inf
+        self,
+        rank_method,
+        na_option,
+        ascending,
+        dtype,
+        na_value,
+        pos_inf,
+        neg_inf,
+        using_nan_is_na,
     ):
         pytest.importorskip("scipy")
         if dtype == "float64[pyarrow]":
@@ -352,7 +368,7 @@ def test_rank_tie_methods_on_infs_nans(
             order = [ranks[1], ranks[0], ranks[2]]
         elif na_option == "bottom":
             order = [ranks[0], ranks[2], ranks[1]]
-        elif dtype == "float64[pyarrow]":
+        elif dtype == "float64[pyarrow]" and not using_nan_is_na:
             order = [ranks[0], [NA] * chunk, ranks[1]]
         else:
             order = [ranks[0], [np.nan] * chunk, ranks[1]]
@@ -424,7 +440,7 @@ def test_rank_descending(
         method, _ = results
         if (
             dtype == "int64"
-            or (dtype in ["int64[pyarrow]"] and not using_nan_is_na)
+            or (dtype in ["int64[pyarrow]", "Int64", "Float64"] and not using_nan_is_na)
             or (not using_infer_string and dtype == "str")
         ):
             s = ser.dropna().astype(dtype)
@@ -436,6 +452,8 @@ def test_rank_descending(
             expected = (s.astype("float64").max() - s.astype("float64")).rank()
         else:
             expected = (s.max() - s).rank()
+        if dtype == "string[pyarrow]" and not using_nan_is_na:
+            expected = expected.replace(np.nan, NA)
         tm.assert_series_equal(res, expected.astype(expected_dtype(dtype, "average")))
 
         if dtype.startswith("str"):
@@ -445,6 +463,8 @@ def test_rank_descending(
         else:
             expected = (s.max() - s).rank(method=method)
         res2 = s.rank(method=method, ascending=False)
+        if dtype == "string[pyarrow]" and not using_nan_is_na:
+            expected = expected.replace(np.nan, NA)
         tm.assert_series_equal(res2, expected.astype(expected_dtype(dtype, method)))
 
     def test_rank_int(self, ser, results):

From 95f9ad958504e9deff2b1d4114a7027170074182 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 09:37:39 -0700
Subject: [PATCH 32/39] update style test

---
 pandas/tests/io/formats/style/test_highlight.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/formats/style/test_highlight.py b/pandas/tests/io/formats/style/test_highlight.py
index 5d19e9c14d534..98c1f70f08e89 100644
--- a/pandas/tests/io/formats/style/test_highlight.py
+++ b/pandas/tests/io/formats/style/test_highlight.py
@@ -15,8 +15,10 @@
 @pytest.fixture(params=[(None, "float64"), (NA, "Int64")])
 def df(request):
     # GH 45804
+    dtype = request.param[1]
+    item = np.nan if dtype == "float64" else NA
     return DataFrame(
-        {"A": [0, np.nan, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1]
+        {"A": [0, item, 10], "B": [1, request.param[0], 2]}, dtype=request.param[1]
     )
 
 

From 1e3d105240d4d28640e8b82f4dd2837eba01c134 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 09:43:18 -0700
Subject: [PATCH 33/39] update asvs, mypy ignores

---
 asv_bench/benchmarks/algorithms.py    | 4 ++--
 asv_bench/benchmarks/frame_methods.py | 3 +++
 asv_bench/benchmarks/groupby.py       | 4 ++++
 pandas/core/algorithms.py             | 7 ++++---
 4 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py
index 933e8fbc175d8..422ba5201bc4e 100644
--- a/asv_bench/benchmarks/algorithms.py
+++ b/asv_bench/benchmarks/algorithms.py
@@ -199,8 +199,8 @@ class SortIntegerArray:
     params = [10**3, 10**5]
 
     def setup(self, N):
-        data = np.arange(N, dtype=float)
-        data[40] = np.nan
+        data = np.arange(N, dtype=float).astype(object)
+        data[40] = pd.NA
         self.array = pd.array(data, dtype="Int64")
 
     def time_argsort(self, N):
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index cd7851acae3f2..14fa64c01f1a5 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -4,6 +4,7 @@
 import numpy as np
 
 from pandas import (
+    NA,
     DataFrame,
     Index,
     MultiIndex,
@@ -445,6 +446,8 @@ def setup(self, inplace, dtype):
             values[::2] = np.nan
             if dtype == "Int64":
                 values = values.round()
+                values = values.astype(object)
+                values[::2] = NA
             self.df = DataFrame(values, dtype=dtype)
         self.fill_values = self.df.iloc[self.df.first_valid_index()].to_dict()
 
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 19c556dfe9d1f..7c1d6457eea15 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -689,6 +689,10 @@ def setup(self, dtype, method, with_nans):
             null_vals = vals.astype(float, copy=True)
             null_vals[::2, :] = np.nan
             null_vals[::3, :] = np.nan
+            if dtype in ["Int64", "Float64"]:
+                null_vals = null_vals.astype(object)
+                null_vals[::2, :] = NA
+                null_vals[::3, :] = NA
             df = DataFrame(null_vals, columns=list("abcde"), dtype=dtype)
             df["key"] = keys
             self.df = df
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index c14ab2bc02da2..aa950022c5d1e 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1067,12 +1067,13 @@ def rank(
     is_datetimelike = needs_i8_conversion(values.dtype)
     if (
         isinstance(values.dtype, BaseMaskedDtype)
-        and values._hasna
+        and values._hasna  # type: ignore[union-attr]
         and values.dtype.kind in "iuf"
     ):
         # e.g. test_rank_ea_small_values
-        # TODO: bug in the object-dtype path that we would get without this special casting.
-        values = values.to_numpy(dtype=np.float64, na_value=np.nan)
+        # TODO: bug in the object-dtype path that we would get without
+        #  this special casting.
+        values = values.to_numpy(dtype=np.float64, na_value=np.nan)  # type: ignore[union-attr]
     else:
         values = _ensure_data(values)
 

From cefeb6b2ab85175ca729cd9d78c2e7a280418e5f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 09:48:50 -0700
Subject: [PATCH 34/39] pre-commit fixup

---
 pandas/core/config_init.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 1478380d90a7d..65f3b3f179e4c 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -429,7 +429,7 @@ def is_terminal() -> bool:
 
     cf.register_option(
         "nan_is_na",
-        os.environ.get("PANDAS_NAN_IS_NA", 0) == "1",
+        os.environ.get("PANDAS_NAN_IS_NA", "0") == "1",
         "Whether to treat NaN entries as interchangeable with pd.NA in "
         "numpy-nullable and pyarrow float dtypes. See discussion in "
         "https://github.com/pandas-dev/pandas/issues/32265",

From bc9c8891b2374c3db362a0332bba3c440a414c81 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 10:11:08 -0700
Subject: [PATCH 35/39] doc fixup

---
 doc/source/whatsnew/v0.24.0.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 0f40f5bfa5fc9..27d5a65a08467 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -50,7 +50,7 @@ marker of ``np.nan`` will infer to integer dtype. The display of the ``Series``
 
 .. ipython:: python
 
-   s = pd.Series([1, 2, np.nan], dtype='Int64')
+   s = pd.Series([1, 2, pd.NA], dtype='Int64')
    s
 
 
@@ -166,7 +166,7 @@ See the :ref:`dtypes docs <basics.dtypes>` for more on extension arrays.
 
 .. ipython:: python
 
-   pd.array([1, 2, np.nan], dtype='Int64')
+   pd.array([1, 2, pd.NA], dtype='Int64')
    pd.array(['a', 'b', 'c'], dtype='category')
 
 Passing data for which there isn't dedicated extension type (e.g. float, integer, etc.)

From 74ab221d3bf6f1946eb7ee83b80dee952ee1b98f Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 13:41:09 -0700
Subject: [PATCH 36/39] Remove special-casing

---
 pandas/core/algorithms.py                | 12 +-----------
 pandas/tests/series/methods/test_rank.py | 22 ++++++++--------------
 2 files changed, 9 insertions(+), 25 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index aa950022c5d1e..533b9b689af0b 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1065,17 +1065,7 @@ def rank(
         (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
     """
     is_datetimelike = needs_i8_conversion(values.dtype)
-    if (
-        isinstance(values.dtype, BaseMaskedDtype)
-        and values._hasna  # type: ignore[union-attr]
-        and values.dtype.kind in "iuf"
-    ):
-        # e.g. test_rank_ea_small_values
-        # TODO: bug in the object-dtype path that we would get without
-        #  this special casting.
-        values = values.to_numpy(dtype=np.float64, na_value=np.nan)  # type: ignore[union-attr]
-    else:
-        values = _ensure_data(values)
+    values = _ensure_data(values)
 
     if values.ndim == 1:
         ranks = algos.rank_1d(
diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index 55ee660d09067..ee221c0b72e65 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -288,20 +288,14 @@ def test_rank_tie_methods(
             pytest.skip("int64/str does not support NaN")
 
         ser = ser if dtype is None else ser.astype(dtype)
+        if dtype in ["float64[pyarrow]", "Float64"] and not using_nan_is_na:
+            # TODO: use ser.replace(np.nan, NA) once that works
+            ser[np.isnan(ser.to_numpy(dtype=np.float64, na_value=np.nan))] = NA
+            mask = np.isnan(exp)
+            exp = exp.astype(object)
+            exp[mask] = NA
+
         result = ser.rank(method=method)
-        if dtype == "float64[pyarrow]" and not using_nan_is_na:
-            # the NaNs are not treated as NA
-            exp = exp.copy()
-            if method == "average":
-                exp[np.isnan(ser)] = 9.5
-            elif method == "dense":
-                exp[np.isnan(ser)] = 6
-            elif method == "max":
-                exp[np.isnan(ser)] = 10
-            elif method == "min":
-                exp[np.isnan(ser)] = 9
-            elif method == "first":
-                exp[np.isnan(ser)] = [9, 10]
 
         if dtype == "string[pyarrow]" and not using_nan_is_na:
             mask = np.isnan(exp)
@@ -368,7 +362,7 @@ def test_rank_tie_methods_on_infs_nans(
             order = [ranks[1], ranks[0], ranks[2]]
         elif na_option == "bottom":
             order = [ranks[0], ranks[2], ranks[1]]
-        elif dtype == "float64[pyarrow]" and not using_nan_is_na:
+        elif dtype in ("float64[pyarrow]", "Float64") and not using_nan_is_na:
             order = [ranks[0], [NA] * chunk, ranks[1]]
         else:
             order = [ranks[0], [np.nan] * chunk, ranks[1]]

From 0773a86df7c14690ab63c3c720e6fc653db2ca0d Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 13:58:37 -0700
Subject: [PATCH 37/39] comment

---
 pandas/tests/series/methods/test_rank.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/series/methods/test_rank.py b/pandas/tests/series/methods/test_rank.py
index ee221c0b72e65..357894cbd0fe3 100644
--- a/pandas/tests/series/methods/test_rank.py
+++ b/pandas/tests/series/methods/test_rank.py
@@ -280,6 +280,7 @@ def test_rank_tie_methods(
         if (
             dtype == "int64"
             or (
+                # TODO: these can work but need to update ser construction.
                 dtype in ["int64[pyarrow]", "uint64[pyarrow]", "Int64"]
                 and not using_nan_is_na
             )

From 35a3c144b2671ca978e31c491463d8386ce010d6 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Mon, 4 Aug 2025 19:13:34 -0700
Subject: [PATCH 38/39] ruff format

---
 pandas/tests/indexing/test_iloc.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 5414389f52fc5..bbff484341ad5 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -795,9 +795,9 @@ def test_iloc_mask(self):
                     idx is None or (idx == "index" and method != ".iloc")
                 ) and "0b" in expected_result:
                     # For successful numeric results, exact match is needed
-                    assert (
-                        expected_result == answer
-                    ), f"[{key}] does not match [{answer}]"
+                    assert expected_result == answer, (
+                        f"[{key}] does not match [{answer}]"
+                    )
                 else:
                     # For error messages, substring match is sufficient
                     assert expected_result in answer, f"[{key}] not found in [{answer}]"

From 71d1c03766fbb17e7d7a1325be7170f08bac03e0 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Tue, 5 Aug 2025 18:10:41 -0700
Subject: [PATCH 39/39] Set default to True

---
 pandas/core/config_init.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 65f3b3f179e4c..9b317b51cabdc 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -429,7 +429,7 @@ def is_terminal() -> bool:
 
     cf.register_option(
         "nan_is_na",
-        os.environ.get("PANDAS_NAN_IS_NA", "0") == "1",
+        os.environ.get("PANDAS_NAN_IS_NA", "1") == "1",
         "Whether to treat NaN entries as interchangeable with pd.NA in "
         "numpy-nullable and pyarrow float dtypes. See discussion in "
         "https://github.com/pandas-dev/pandas/issues/32265",