From b43303f3c58cf20a8d2895fe73b9f707827d0ddd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 11 Jan 2023 14:17:39 -0800 Subject: [PATCH 1/9] Fix asarray_tuplesafe for numpy 1.24.1 deprecation --- pandas/compat/numpy/__init__.py | 1 + pandas/core/common.py | 7 +++- pandas/tests/reshape/test_pivot.py | 66 ++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 60ec74553a207..d09a88e94b1eb 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -8,6 +8,7 @@ _nlv = Version(_np_version) np_version_under1p21 = _nlv < Version("1.21") np_version_under1p22 = _nlv < Version("1.22") +np_version_under1p24 = _nlv < Version("1.24") np_version_gte1p22 = _nlv >= Version("1.22") is_numpy_dev = _nlv.dev is not None _min_numpy_ver = "1.20.3" diff --git a/pandas/core/common.py b/pandas/core/common.py index 8764ee0ea6ed7..98640e65a8135 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -36,6 +36,7 @@ RandomState, T, ) +from pandas.compat.numpy import np_version_under1p24 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( @@ -43,6 +44,7 @@ is_bool_dtype, is_extension_array_dtype, is_integer, + is_list_like, ) from pandas.core.dtypes.generic import ( ABCExtensionArray, @@ -232,7 +234,10 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi elif isinstance(values, ABCIndex): return values._values - if isinstance(values, list) and dtype in [np.object_, object]: + if isinstance(values, list) and ( + dtype in [np.object_, object] + or (not np_version_under1p24 and any(is_list_like(val) for val in values)) + ): return construct_1d_object_array_from_listlike(values) result = np.asarray(values, dtype=dtype) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9a72a8dadf8d0..770d7397f1c06 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2297,6 +2297,72 @@ def test_pivot_table_datetime_warning(self): ) tm.assert_frame_equal(result, expected) + def test_pivot_table_with_mixed_nested_tuples(self): + # GH 50342 + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + ("col5",): [ + "foo", + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + ], + ("col6", 6): [ + "one", + "one", + "one", + "two", + "two", + "one", + "one", + "two", + "two", + ], + (7, "seven"): [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + } + ) + result = pivot_table( + df, values="D", index=["A", "B"], columns=[(7, "seven")], aggfunc=np.sum + ) + expected = DataFrame( + [[4.0, 5.0], [7.0, 6.0], [4.0, 1.0], [np.nan, 6.0]], + columns=Index(["large", "small"], name=(7, "seven")), + index=MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo"], ["one", "two"] * 2], names=["A", "B"] + ), + ) + tm.assert_frame_equal(result, expected) + class TestPivot: def test_pivot(self): From 750ac7d9dfe1b852bf8577d8b1de9b05e969c1aa Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 11 Jan 2023 14:38:39 -0800 Subject: [PATCH 2/9] BUG: pivot_table with nested elements and numpy 1.24 --- doc/source/whatsnew/v1.5.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index 0cb8796e3fb5d..39a8f7ffb9d82 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -30,7 +30,7 @@ Bug fixes - Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`) - Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) - Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`) -- +- Bug in :func:`pivot_table` with NumPy 1.24 or greater when the :class:`DataFrame` columns has nested elements (:issue:`50342`) .. --------------------------------------------------------------------------- .. _whatsnew_153.other: From 5c4669583ead98c657b109cfb9cbd5a3af8fcca8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 11 Jan 2023 17:14:05 -0800 Subject: [PATCH 3/9] For all numpy versions --- pandas/core/common.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 98640e65a8135..e0bed9e960a7f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -36,7 +36,6 @@ RandomState, T, ) -from pandas.compat.numpy import np_version_under1p24 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike from pandas.core.dtypes.common import ( @@ -235,8 +234,7 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi return values._values if isinstance(values, list) and ( - dtype in [np.object_, object] - or (not np_version_under1p24 and any(is_list_like(val) for val in values)) + dtype in [np.object_, object] or any(is_list_like(val) for val in values) ): return construct_1d_object_array_from_listlike(values) From 4b88e87551ea97fcb8e50a512e0f8966d783d7da Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 11 Jan 2023 17:14:55 -0800 Subject: [PATCH 4/9] Undo unneeded variable --- pandas/compat/numpy/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index d09a88e94b1eb..60ec74553a207 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -8,7 +8,6 @@ _nlv = Version(_np_version) np_version_under1p21 = _nlv < Version("1.21") np_version_under1p22 = _nlv < Version("1.22") -np_version_under1p24 = _nlv < Version("1.24") np_version_gte1p22 = _nlv >= Version("1.22") is_numpy_dev = _nlv.dev is not None _min_numpy_ver = "1.20.3" From c56e38a7d071a7d18a149e922f43583571357d94 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 12 Jan 2023 16:31:00 -0800 Subject: [PATCH 5/9] fix for arraymanager --- pandas/tests/reshape/test_pivot.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 770d7397f1c06..ee748ba762f26 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2297,7 +2297,7 @@ def test_pivot_table_datetime_warning(self): ) tm.assert_frame_equal(result, expected) - def test_pivot_table_with_mixed_nested_tuples(self): + def test_pivot_table_with_mixed_nested_tuples(self, using_array_manager): # GH 50342 df = DataFrame( { @@ -2361,6 +2361,9 @@ def test_pivot_table_with_mixed_nested_tuples(self): [["bar", "bar", "foo", "foo"], ["one", "two"] * 2], names=["A", "B"] ), ) + if using_array_manager: + # INFO(ArrayManager) column without NaNs can preserve int dtype + expected["small"] = expected["small"].astype("int64") tm.assert_frame_equal(result, expected) From 419f857d28d581c37f109c95b770c884111b4370 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sun, 15 Jan 2023 14:26:56 -0800 Subject: [PATCH 6/9] use try except --- pandas/core/common.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index e0bed9e960a7f..29386dfb4eeda 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -25,6 +25,7 @@ cast, overload, ) +import warnings import numpy as np @@ -43,7 +44,6 @@ is_bool_dtype, is_extension_array_dtype, is_integer, - is_list_like, ) from pandas.core.dtypes.generic import ( ABCExtensionArray, @@ -233,12 +233,17 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi elif isinstance(values, ABCIndex): return values._values - if isinstance(values, list) and ( - dtype in [np.object_, object] or any(is_list_like(val) for val in values) - ): + if isinstance(values, list) and dtype in [np.object_, object]: return construct_1d_object_array_from_listlike(values) - result = np.asarray(values, dtype=dtype) + try: + with warnings.catch_warnings(): + # Can remove warning filter once NumPy 1.24 is min version + warnings.simplefilter("ignore", np.VisibleDeprecationWarning) + result = np.asarray(values, dtype=dtype) + except ValueError: + # More performant than checking is_list_like over each element + return construct_1d_object_array_from_listlike(values) if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) From b8f453b1e7904926cd72e971684d0d5554352996 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 16 Jan 2023 10:38:56 -0800 Subject: [PATCH 7/9] typing --- pandas/core/common.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 29386dfb4eeda..9793522400904 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -243,7 +243,9 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi result = np.asarray(values, dtype=dtype) except ValueError: # More performant than checking is_list_like over each element - return construct_1d_object_array_from_listlike(values) + # error: Argument 1 to "construct_1d_object_array_from_listlike" + # has incompatible type "Iterable[Any]"; expected "Sized" + return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type] if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) From b68e7279a1c047f59a2ac39d0b44030389f5435e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 17 Jan 2023 09:27:09 -0800 Subject: [PATCH 8/9] Update pandas/core/common.py Co-authored-by: Marc Garcia --- pandas/core/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 9793522400904..161cad1ce9b2b 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -242,7 +242,7 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi warnings.simplefilter("ignore", np.VisibleDeprecationWarning) result = np.asarray(values, dtype=dtype) except ValueError: - # More performant than checking is_list_like over each element + # Using try/except since it's more performant than checking is_list_like over each element # error: Argument 1 to "construct_1d_object_array_from_listlike" # has incompatible type "Iterable[Any]"; expected "Sized" return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type] From cb084a6ee498669ea92ccf2500c51e654c79c78a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 17 Jan 2023 09:31:38 -0800 Subject: [PATCH 9/9] line length --- pandas/core/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 161cad1ce9b2b..aaa5134ed1aaa 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -242,7 +242,8 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi warnings.simplefilter("ignore", np.VisibleDeprecationWarning) result = np.asarray(values, dtype=dtype) except ValueError: - # Using try/except since it's more performant than checking is_list_like over each element + # Using try/except since it's more performant than checking is_list_like + # over each element # error: Argument 1 to "construct_1d_object_array_from_listlike" # has incompatible type "Iterable[Any]"; expected "Sized" return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type]