pandas-dev · mroeschke · Oct 25, 2023 · Oct 21, 2023 · Oct 21, 2023 · Oct 22, 2023
diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst
@@ -31,6 +31,7 @@ Bug fixes
 - Fixed bug in :meth:`Index.insert` raising when inserting ``None`` into :class:`Index` with ``dtype="string[pyarrow_numpy]"`` (:issue:`55365`)
 - Fixed bug in :meth:`Series.all`  and :meth:`Series.any` not treating missing values correctly for ``dtype="string[pyarrow_numpy]"`` (:issue:`55367`)
 - Fixed bug in :meth:`Series.floordiv` for :class:`ArrowDtype` (:issue:`55561`)
+- Fixed bug in :meth:`Series.mode` not sorting values for arrow backed string dtype (:issue:`55621`)
 - Fixed bug in :meth:`Series.rank` for ``string[pyarrow_numpy]`` dtype (:issue:`55362`)
 - Fixed bug in :meth:`Series.str.extractall` for :class:`ArrowDtype` dtype being converted to object (:issue:`53846`)
 - Fixed bug where PDEP-6 warning about setting an item of an incompatible dtype was being shown when creating a new conditional column (:issue:`55025`)

diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py
@@ -7,13 +7,19 @@
 try:
     import pyarrow as pa
 
+    pa_installed = True
     _palv = Version(Version(pa.__version__).base_version)
     pa_version_under10p1 = _palv < Version("10.0.1")
     pa_version_under11p0 = _palv < Version("11.0.0")
     pa_version_under12p0 = _palv < Version("12.0.0")
     pa_version_under13p0 = _palv < Version("13.0.0")
     pa_version_under14p0 = _palv < Version("14.0.0")
 except ImportError:
+    pa_installed = False
+    pa_version_under7p0 = True
+    pa_version_under8p0 = True
+    pa_version_under9p0 = True
+    pa_version_under10p0 = True
     pa_version_under10p1 = True
     pa_version_under11p0 = True
     pa_version_under12p0 = True

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -1919,6 +1919,7 @@ def _mode(self, dropna: bool = True) -> Self:
         if pa.types.is_temporal(pa_type):
             most_common = most_common.cast(pa_type)
 
+        most_common = most_common.take(pc.array_sort_indices(most_common))
         return type(self)(most_common)
 
     def _maybe_convert_setitem_value(self, value):

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -1333,7 +1333,7 @@ def test_quantile(data, interpolation, quantile, request):
 
 @pytest.mark.parametrize(
     "take_idx, exp_idx",
-    [[[0, 0, 2, 2, 4, 4], [0, 4]], [[0, 0, 0, 2, 4, 4], [0]]],
+    [[[0, 0, 2, 2, 4, 4], [4, 0]], [[0, 0, 0, 2, 4, 4], [0]]],
     ids=["multi_mode", "single_mode"],
 )
 def test_mode_dropna_true(data_for_grouping, take_idx, exp_idx):
@@ -1351,7 +1351,7 @@ def test_mode_dropna_false_mode_na(data):
     expected = pd.Series([None], dtype=data.dtype)
     tm.assert_series_equal(result, expected)
 
-    expected = pd.Series([None, data[0]], dtype=data.dtype)
+    expected = pd.Series([data[0], None], dtype=data.dtype)
     result = expected.mode(dropna=False)
     tm.assert_series_equal(result, expected)
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 
+from pandas.compat.pyarrow import pa_installed
 from pandas.errors import (
     PerformanceWarning,
     SpecificationError,
@@ -2541,7 +2542,10 @@ def test_groupby_column_index_name_lost(func):
     "infer_string",
     [
         False,
-        True,
+        pytest.param(
+            True,
+            marks=pytest.mark.skipif(not pa_installed, reason="arrow not installed"),
+        ),
     ],
 )
 def test_groupby_duplicate_columns(infer_string):
@@ -2773,13 +2777,23 @@ def test_rolling_wrong_param_min_period():
         test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum()
 
 
-def test_by_column_values_with_same_starting_value():
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        object,
+        pytest.param(
+            "string[pyarrow_numpy]",
+            marks=pytest.mark.skipif(not pa_installed, reason="arrow not installed"),
+        ),
+    ],
+)
+def test_by_column_values_with_same_starting_value(dtype):
     # GH29635
     df = DataFrame(
         {
             "Name": ["Thomas", "Thomas", "Thomas John"],
             "Credit": [1200, 1300, 900],
-            "Mood": ["sad", "happy", "happy"],
+            "Mood": Series(["sad", "happy", "happy"], dtype=dtype),
         }
     )
     aggregate_details = {"Mood": Series.mode, "Credit": "sum"}