Skip to content

Commit

Permalink
Backport PR #58202: DOC/TST: Document numpy 2.0 support and add tests… (
Browse files Browse the repository at this point in the history
#58208)

Backport PR #58202: DOC/TST: Document numpy 2.0 support and add tests for string array
  • Loading branch information
lithomas1 committed Apr 10, 2024
1 parent 45b0b32 commit 5466f15
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 0 deletions.
15 changes: 15 additions & 0 deletions doc/source/whatsnew/v2.2.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@ including other versions of pandas.
{{ header }}

.. ---------------------------------------------------------------------------
.. _whatsnew_220.np2_compat:

Pandas 2.2.2 is now compatible with numpy 2.0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Pandas 2.2.2 is the first version of pandas that is generally compatible with the upcoming
numpy 2.0 release, and wheels for pandas 2.2.2 will work with both numpy 1.x and 2.x.

One major caveat is that arrays created with numpy 2.0's new ``StringDtype`` will convert
to ``object`` dtyped arrays upon :class:`Series`/:class:`DataFrame` creation.
Full support for numpy 2.0's StringDtype is expected to land in pandas 3.0.

As usual please report any bugs discovered to our `issue tracker <https://github.com/pandas-dev/pandas/issues/new/choose>`_

.. _whatsnew_222.regressions:

Fixed regressions
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from pandas._config import using_pyarrow_string_dtype

from pandas._libs import lib
from pandas.compat.numpy import np_version_gt2
from pandas.errors import IntCastingNaNError
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -3118,6 +3119,24 @@ def test_columns_indexes_raise_on_sets(self):
with pytest.raises(ValueError, match="columns cannot be a set"):
DataFrame(data, columns={"a", "b", "c"})

# TODO: make this not cast to object in pandas 3.0
@pytest.mark.skipif(
not np_version_gt2, reason="StringDType only available in numpy 2 and above"
)
@pytest.mark.parametrize(
"data",
[
{"a": ["a", "b", "c"], "b": [1.0, 2.0, 3.0], "c": ["d", "e", "f"]},
],
)
def test_np_string_array_object_cast(self, data):
from numpy.dtypes import StringDType

data["a"] = np.array(data["a"], dtype=StringDType())
res = DataFrame(data)
assert res["a"].dtype == np.object_
assert (res["a"] == data["a"]).all()


def get1(obj): # TODO: make a helper in tm?
if isinstance(obj, Series):
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2191,6 +2191,25 @@ def test_series_constructor_infer_multiindex(self, container, data):
multi = Series(data, index=indexes)
assert isinstance(multi.index, MultiIndex)

# TODO: make this not cast to object in pandas 3.0
@pytest.mark.skipif(
not np_version_gt2, reason="StringDType only available in numpy 2 and above"
)
@pytest.mark.parametrize(
"data",
[
["a", "b", "c"],
["a", "b", np.nan],
],
)
def test_np_string_array_object_cast(self, data):
from numpy.dtypes import StringDType

arr = np.array(data, dtype=StringDType())
res = Series(arr)
assert res.dtype == np.object_
assert (res == data).all()


class TestSeriesConstructorInternals:
def test_constructor_no_pandas_array(self, using_array_manager):
Expand Down

0 comments on commit 5466f15

Please sign in to comment.