From f8153f37e5a75700a982c2dcde0e6decce2b72b0 Mon Sep 17 00:00:00 2001 From: Pedro Diogo Date: Fri, 28 Mar 2025 16:21:32 +0000 Subject: [PATCH 1/3] Fix #60766:.map,.apply would convert element type for extension array. The Int32Dtype type allows representing integers with support for null values (pd.NA). However, when using .map(f) or .apply(f), the elements passed to f are converted to float64, and pd.NA is transformed into np.nan. This happens because .map() and .apply() internally use numpy, which automatically converts the data to float64, even when the original type is Int32Dtype. The fix (just remove the method to_numpy()) ensures that when using .map() or .apply(), the elements in the series retain their original type (Int32, Float64, boolean, etc.), preventing unnecessary conversions to float64 and ensuring that pd.NA remains correctly handled. --- pandas/core/arrays/masked.py | 2 +- .../arrays/masked/test_basemaskedarray_map.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 pandas/tests/arrays/masked/test_basemaskedarray_map.py diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 708a3818bcbb7..b2d9541550745 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1325,7 +1325,7 @@ def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs): return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis) def map(self, mapper, na_action: Literal["ignore"] | None = None): - return map_array(self.to_numpy(), mapper, na_action=na_action) + return map_array(self, mapper, na_action=na_action) @overload def any( diff --git a/pandas/tests/arrays/masked/test_basemaskedarray_map.py b/pandas/tests/arrays/masked/test_basemaskedarray_map.py new file mode 100644 index 0000000000000..8a2cdbecdfe43 --- /dev/null +++ b/pandas/tests/arrays/masked/test_basemaskedarray_map.py @@ -0,0 +1,18 @@ +import pandas as pd + +def test_basemaskedarray_map(): + for dtype, data, expected_data in [ + ("Int32", [1, 2, None, 4], [2, 3, pd.NA, 5]), + + ]: + s = pd.Series(data, dtype=dtype) + + def transform(x): + if x is None: + return x + return x + 1 + + result = s.map(transform) + expected = pd.Series(expected_data, dtype=result.dtype) + + assert result.tolist() == expected.tolist() From bf6aaef64249418f4f087527f04abd917a16fa03 Mon Sep 17 00:00:00 2001 From: Pedro Diogo Date: Sat, 3 May 2025 22:46:54 +0100 Subject: [PATCH 2/3] Update v3.0.0.rst --- doc/source/whatsnew/v3.0.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2b437734a451a..04d2bbdf9f24a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -815,6 +815,8 @@ ExtensionArray - Bug in comparison between object with :class:`ArrowDtype` and incompatible-dtyped (e.g. string vs bool) incorrectly raising instead of returning all-``False`` (for ``==``) or all-``True`` (for ``!=``) (:issue:`59505`) - Bug in constructing pandas data structures when passing into ``dtype`` a string of the type followed by ``[pyarrow]`` while PyArrow is not installed would raise ``NameError`` rather than ``ImportError`` (:issue:`57928`) - Bug in various :class:`DataFrame` reductions for pyarrow temporal dtypes returning incorrect dtype when result was null (:issue:`59234`) +- Bug in :meth:`Series.map` and :meth:`Series.apply` where applying functions to a Series with an :class:`Int32Dtype` or other :class:`ExtensionDtype` would convert elements to float and ``pd.NA`` to ``np.nan``, instead of preserving the original types (:issue:`60766`) + Styler ^^^^^^ From e8edcea911518212ba4514b9dc255ce35c16ae0f Mon Sep 17 00:00:00 2001 From: Pedro Diogo Date: Wed, 7 May 2025 20:02:08 +0100 Subject: [PATCH 3/3] fixed test_masked.py --- pandas/tests/extension/test_masked.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index 3b9079d06e231..05a6265d6f01e 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -171,6 +171,12 @@ class TestMaskedArrays(base.ExtensionTests): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): result = data_missing.map(lambda x: x, na_action=na_action) + if data_missing.dtype.kind != "b": + for i in range(len(result)): + if result[i] is pd.NA: + result[i] = "nan" + result = result.astype("float64") + if data_missing.dtype == Float32Dtype(): # map roundtrips through objects, which converts to float64 expected = data_missing.to_numpy(dtype="float64", na_value=np.nan) @@ -181,10 +187,15 @@ def test_map(self, data_missing, na_action): def test_map_na_action_ignore(self, data_missing_for_sorting): zero = data_missing_for_sorting[2] result = data_missing_for_sorting.map(lambda x: zero, na_action="ignore") + if data_missing_for_sorting.dtype.kind == "b": expected = np.array([False, pd.NA, False], dtype=object) else: expected = np.array([zero, np.nan, zero]) + for i in range(len(result)): + if result[i] is pd.NA: + result[i] = "nan" + result = result.astype("float64") tm.assert_numpy_array_equal(result, expected) def _get_expected_exception(self, op_name, obj, other):