From cfd116dddfb620a69c9232002b7cebfaf874039e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 22 Dec 2022 17:01:58 +0100 Subject: [PATCH 1/3] BUG/COMPAT: fix assert_* functions for nested arrays with latest numpy --- pandas/core/dtypes/missing.py | 2 + pandas/tests/dtypes/test_missing.py | 109 ++++++++++++++++-- pandas/tests/util/test_assert_almost_equal.py | 84 ++++++++++++++ 3 files changed, 188 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 000b5ebbdd2f7..8b621c0e381b5 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -584,6 +584,8 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo if "boolean value of NA is ambiguous" in str(err): return False raise + except ValueError: + return False return True diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 94707dc2e68c5..a8ba6dd836425 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -545,18 +545,113 @@ def test_array_equivalent_str(dtype): ) -def test_array_equivalent_nested(): +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested(strict_nan): # reached in groupby aggregations, make sure we use np.any when checking # if the comparison is truthy - left = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) - right = np.array([np.array([50, 70, 90]), np.array([20, 30, 40])], dtype=object) + left = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object) + right = np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object) - assert array_equivalent(left, right, strict_nan=True) - assert not array_equivalent(left, right[::-1], strict_nan=True) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) - left = np.array([np.array([50, 50, 50]), np.array([40, 40, 40])], dtype=object) + left = np.array([np.array([50, 50, 50]), np.array([40, 40])], dtype=object) right = np.array([50, 40]) - assert not array_equivalent(left, right, strict_nan=True) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested2(strict_nan): + # more than one level of nesting + left = np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ) + right = np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.array([np.array([np.array([50, 50, 50])], dtype=object)], dtype=object) + right = np.array([50]) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.parametrize( + "strict_nan", [pytest.param(True, marks=pytest.mark.xfail), False] +) +def test_array_equivalent_nested_list(strict_nan): + left = np.array([[50, 70, 90], [20, 30]], dtype=object) + right = np.array([[50, 70, 90], [20, 30]], dtype=object) + + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + left = np.array([[50, 50, 50], [40, 40]], dtype=object) + right = np.array([50, 40]) + assert not array_equivalent(left, right, strict_nan=strict_nan) + + +@pytest.mark.xfail(reason="failing") +@pytest.mark.parametrize("strict_nan", [True, False]) +def test_array_equivalent_nested_mixed_list(strict_nan): + # mixed arrays / lists in left and right + # https://github.com/pandas-dev/pandas/issues/50360 + left = np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object) + right = np.array([[1, 2, 3], [4, 5]], dtype=object) + + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + # multiple levels of nesting + left = np.array( + [ + np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object), + np.array([np.array([6]), np.array([7, 8]), np.array([9])], dtype=object), + ], + dtype=object, + ) + right = np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + # same-length lists + subarr = np.empty(2, dtype=object) + subarr[:] = [ + np.array([None, "b"], dtype=object), + np.array(["c", "d"], dtype=object), + ] + left = np.array([subarr, None], dtype=object) + right = np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + +@pytest.mark.xfail(reason="failing") +@pytest.mark.parametrize("strict_nan", [True, False]) +def test_array_equivalent_nested_dicts(strict_nan): + left = np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object) + right = np.array( + [{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object + ) + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + + right2 = np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object) + assert array_equivalent(left, right2, strict_nan=strict_nan) + assert not array_equivalent(left, right2[::-1], strict_nan=strict_nan) def test_array_equivalent_index_with_tuples(): diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py index ba52536e246d0..d5c334c87315d 100644 --- a/pandas/tests/util/test_assert_almost_equal.py +++ b/pandas/tests/util/test_assert_almost_equal.py @@ -448,3 +448,87 @@ def test_assert_almost_equal_iterable_values_mismatch(): with pytest.raises(AssertionError, match=msg): tm.assert_almost_equal([1, 2], [1, 3]) + + +subarr = np.empty(2, dtype=object) +subarr[:] = [np.array([None, "b"], dtype=object), np.array(["c", "d"], dtype=object)] + +NESTED_CASES = [ + # nested array + ( + np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object), + np.array([np.array([50, 70, 90]), np.array([20, 30])], dtype=object), + ), + # >1 level of nesting + ( + np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ), + np.array( + [ + np.array([np.array([50, 70]), np.array([90])], dtype=object), + np.array([np.array([20, 30])], dtype=object), + ], + dtype=object, + ), + ), + # lists + ( + np.array([[50, 70, 90], [20, 30]], dtype=object), + np.array([[50, 70, 90], [20, 30]], dtype=object), + ), + # mixed array/list + ( + np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object), + np.array([[1, 2, 3], [4, 5]], dtype=object), + ), + ( + np.array( + [ + np.array([np.array([1, 2, 3]), np.array([4, 5])], dtype=object), + np.array( + [np.array([6]), np.array([7, 8]), np.array([9])], dtype=object + ), + ], + dtype=object, + ), + np.array([[[1, 2, 3], [4, 5]], [[6], [7, 8], [9]]], dtype=object), + ), + # same-length lists + ( + np.array([subarr, None], dtype=object), + np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object), + ), + # dicts + ( + np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object), + np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object), + ), + ( + np.array([{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object), + np.array([{"f1": 1, "f2": ["a", "b"]}], dtype=object), + ), + # array/list of dicts + ( + np.array( + [ + np.array( + [{"f1": 1, "f2": np.array(["a", "b"], dtype=object)}], dtype=object + ), + np.array([], dtype=object), + ], + dtype=object, + ), + np.array([[{"f1": 1, "f2": ["a", "b"]}], []], dtype=object), + ), +] + + +@pytest.mark.filterwarnings("ignore:elementwise comparison failed:DeprecationWarning") +@pytest.mark.parametrize("a,b", NESTED_CASES) +def test_assert_almost_equal_array_nested(a, b): + _assert_almost_equal_both(a, b) From 07da6a73f4b55857e9f93e21effb430e6b3b3d75 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 22 Dec 2022 17:21:57 +0100 Subject: [PATCH 2/3] add whatsnew --- doc/source/whatsnew/v1.5.3.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index 0cb8796e3fb5d..e514267a4b6b8 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -30,6 +30,7 @@ Bug fixes - Bug in :meth:`.Styler.to_excel` leading to error when unrecognized ``border-style`` (e.g. ``"hair"``) provided to Excel writers (:issue:`48649`) - Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) - Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`) +- Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`) - .. --------------------------------------------------------------------------- From 84f1abc92c7fb2eee3326cfe52327e6ce130f02f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 22 Dec 2022 20:47:02 +0100 Subject: [PATCH 3/3] add comment + extra test --- pandas/core/dtypes/missing.py | 2 ++ pandas/tests/dtypes/test_missing.py | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 8b621c0e381b5..3ee5d8c2f2676 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -585,6 +585,8 @@ def _array_equivalent_object(left: np.ndarray, right: np.ndarray, strict_nan: bo return False raise except ValueError: + # numpy can raise a ValueError if left and right cannot be + # compared (e.g. nested arrays) return False return True diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index a8ba6dd836425..76786a53e4478 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -557,6 +557,13 @@ def test_array_equivalent_nested(strict_nan): assert array_equivalent(left, right, strict_nan=strict_nan) assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + left = np.empty(2, dtype=object) + left[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])] + right = np.empty(2, dtype=object) + right[:] = [np.array([50, 70, 90]), np.array([20, 30, 40])] + assert array_equivalent(left, right, strict_nan=strict_nan) + assert not array_equivalent(left, right[::-1], strict_nan=strict_nan) + left = np.array([np.array([50, 50, 50]), np.array([40, 40])], dtype=object) right = np.array([50, 40]) assert not array_equivalent(left, right, strict_nan=strict_nan)