Skip to content

Commit

Permalink
BUG: mode not sorting values for arrow backed strings (pandas-dev#55621)
Browse files Browse the repository at this point in the history
* BUG: mode not sorting values for arrow backed strings

* Fix tests

* Change to pa_installed variable

* Update pyarrow.py

* Fix

* Fix

(cherry picked from commit bb2d2e0)
  • Loading branch information
phofl committed Oct 25, 2023
1 parent f9a4bef commit ba40bda
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 9 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.2.rst
Expand Up @@ -31,6 +31,7 @@ Bug fixes
- Fixed bug in :meth:`Index.insert` raising when inserting ``None`` into :class:`Index` with ``dtype="string[pyarrow_numpy]"`` (:issue:`55365`)
- Fixed bug in :meth:`Series.all` and :meth:`Series.any` not treating missing values correctly for ``dtype="string[pyarrow_numpy]"`` (:issue:`55367`)
- Fixed bug in :meth:`Series.floordiv` for :class:`ArrowDtype` (:issue:`55561`)
- Fixed bug in :meth:`Series.mode` not sorting values for arrow backed string dtype (:issue:`55621`)
- Fixed bug in :meth:`Series.rank` for ``string[pyarrow_numpy]`` dtype (:issue:`55362`)
- Fixed bug in :meth:`Series.str.extractall` for :class:`ArrowDtype` dtype being converted to object (:issue:`53846`)
- Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/arrow/array.py
Expand Up @@ -1886,6 +1886,7 @@ def _mode(self, dropna: bool = True) -> Self:
if pa.types.is_temporal(pa_type):
most_common = most_common.cast(pa_type)

most_common = most_common.take(pc.array_sort_indices(most_common))
return type(self)(most_common)

def _maybe_convert_setitem_value(self, value):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/extension/test_arrow.py
Expand Up @@ -1474,7 +1474,7 @@ def test_quantile(data, interpolation, quantile, request):

@pytest.mark.parametrize(
"take_idx, exp_idx",
[[[0, 0, 2, 2, 4, 4], [0, 4]], [[0, 0, 0, 2, 4, 4], [0]]],
[[[0, 0, 2, 2, 4, 4], [4, 0]], [[0, 0, 0, 2, 4, 4], [0]]],
ids=["multi_mode", "single_mode"],
)
def test_mode_dropna_true(data_for_grouping, take_idx, exp_idx):
Expand All @@ -1492,7 +1492,7 @@ def test_mode_dropna_false_mode_na(data):
expected = pd.Series([None], dtype=data.dtype)
tm.assert_series_equal(result, expected)

expected = pd.Series([None, data[0]], dtype=data.dtype)
expected = pd.Series([data[0], None], dtype=data.dtype)
result = expected.mode(dropna=False)
tm.assert_series_equal(result, expected)

Expand Down
18 changes: 11 additions & 7 deletions pandas/tests/groupby/test_groupby.py
Expand Up @@ -5,11 +5,11 @@
import numpy as np
import pytest

from pandas.compat import pa_version_under7p0
from pandas.errors import (
PerformanceWarning,
SpecificationError,
)
import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
Expand Down Expand Up @@ -2518,10 +2518,7 @@ def test_groupby_column_index_name_lost(func):
"infer_string",
[
False,
pytest.param(
True,
marks=pytest.mark.skipif(pa_version_under7p0, reason="arrow not installed"),
),
pytest.param(True, marks=td.skip_if_no("pyarrow")),
],
)
def test_groupby_duplicate_columns(infer_string):
Expand Down Expand Up @@ -2751,13 +2748,20 @@ def test_rolling_wrong_param_min_period():
test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()


def test_by_column_values_with_same_starting_value():
@pytest.mark.parametrize(
"dtype",
[
object,
pytest.param("string[pyarrow_numpy]", marks=td.skip_if_no("pyarrow")),
],
)
def test_by_column_values_with_same_starting_value(dtype):
# GH29635
df = DataFrame(
{
"Name": ["Thomas", "Thomas", "Thomas John"],
"Credit": [1200, 1300, 900],
"Mood": ["sad", "happy", "happy"],
"Mood": Series(["sad", "happy", "happy"], dtype=dtype),
}
)
aggregate_details = {"Mood": Series.mode, "Credit": "sum"}
Expand Down

0 comments on commit ba40bda

Please sign in to comment.