Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ Sparse

ExtensionArray
^^^^^^^^^^^^^^
- Bug in numpy ufuncs like :func:`numpy.isnan` raising ``TypeError`` on :class:`Series` or :class:`Index` backed by PyArrow dtypes when ``future.distinguish_nan_and_na`` is ``True`` (:issue:`62506`)
- Fixed bug in :meth:`Series.apply` and :meth:`Series.map` where nullable integer dtypes were converted to float, causing precision loss for large integers; now the nullable dtype will be preserved (:issue:`63903`).
-
-
Expand Down
61 changes: 61 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,13 +846,74 @@ def __arrow_array__(self, type=None):
return self._pa_array

def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
if not is_nan_na() and type(self) is ArrowExtensionArray:
# GH#62506 - when distinguish_nan_and_na is True,
# default_array_ufunc converts to numpy via np.asarray which
# produces object dtype that most ufuncs can't handle.
# Replicate ExtensionArray.__array_ufunc__ but convert with
# na_value=np.nan (float, not object) in the default fallback,
# then re-mask NA positions in the result.
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCIndex,
ABCSeries,
)

from pandas.core import arraylike

if any(
isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame))
for other in inputs
):
return NotImplemented

result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

if "out" in kwargs:
return arraylike.dispatch_ufunc_with_out(
self, ufunc, method, *inputs, **kwargs
)

if method == "reduce":
result = arraylike.dispatch_reduction_ufunc(
self, ufunc, method, *inputs, **kwargs
)
if result is not NotImplemented:
return result

# Default: convert to numpy with NaN for NA instead of
# object dtype, run the ufunc, then re-mask.
mask = self.isna()
new_inputs = [
self.to_numpy(na_value=np.nan) if x is self else x for x in inputs
]
result = getattr(ufunc, method)(*new_inputs, **kwargs)

if isinstance(result, tuple):
return tuple(
self._wrap_and_remask_ufunc_result(res, mask) for res in result
)
return self._wrap_and_remask_ufunc_result(result, mask)

# Need to wrap np.array results GH#62800
result = super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
if type(self) is ArrowExtensionArray:
# Exclude ArrowStringArray
return type(self)._from_sequence(result)
return result

def _wrap_and_remask_ufunc_result(
self, result: np.ndarray, mask: npt.NDArray[np.bool_]
) -> ArrowExtensionArray:
arr = type(self)._from_sequence(result)
if mask.any():
arr[mask] = arr.dtype.na_value
return arr

def __array__(
self, dtype: NpDtype | None = None, copy: bool | None = None
) -> np.ndarray:
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -3845,6 +3845,25 @@ def test_setitem_float_nan_is_na(using_nan_is_na):
assert np.isnan(ser[2])


def test_np_ufunc_pyarrow_distinguish_nan_na():
# GH#62506 - ufuncs on pyarrow arrays with distinguish_nan_and_na=True
# should work instead of raising TypeError from object dtype conversion.
with pd.option_context("future.distinguish_nan_and_na", True):
ser = pd.Series([1.0, float("nan"), None], dtype="double[pyarrow]")

result = np.isnan(ser)
expected = pd.Series([False, True, pd.NA], dtype="bool[pyarrow]")
tm.assert_series_equal(result, expected)

result = np.isfinite(ser)
expected = pd.Series([True, False, pd.NA], dtype="bool[pyarrow]")
tm.assert_series_equal(result, expected)

result = np.sqrt(pd.Series([1.0, 4.0, None], dtype="double[pyarrow]"))
expected = pd.Series([1.0, 2.0, pd.NA], dtype="double[pyarrow]")
tm.assert_series_equal(result, expected)


def test_pow_with_all_na_float():
# GH#62520

Expand Down
14 changes: 4 additions & 10 deletions pandas/tests/series/test_npfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,8 @@ def test_numpy_argwhere(index):

@td.skip_if_no("pyarrow")
def test_log_arrow_backed_missing_value(using_nan_is_na):
# GH#56285
# GH#56285, GH#62506
ser = Series([1, 2, None], dtype="float64[pyarrow]")
if using_nan_is_na:
result = np.log(ser)
expected = np.log(Series([1, 2, None], dtype="float64[pyarrow]"))
tm.assert_series_equal(result, expected)
else:
# we get cast to object which raises
msg = "loop of ufunc does not support argument"
with pytest.raises(TypeError, match=msg):
np.log(ser)
result = np.log(ser)
expected = np.log(Series([1, 2, None], dtype="float64[pyarrow]"))
tm.assert_series_equal(result, expected)
Loading