New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: mode not sorting values for arrow backed strings #55621
Changes from 5 commits
6815bbd
699b0bd
b047558
69826ec
50f5ddb
78ada02
024e9b0
fe1bc37
fdb022a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
import numpy as np | ||
import pytest | ||
|
||
from pandas.compat.pyarrow import pa_installed | ||
from pandas.errors import ( | ||
PerformanceWarning, | ||
SpecificationError, | ||
|
@@ -2541,7 +2542,10 @@ def test_groupby_column_index_name_lost(func): | |
"infer_string", | ||
[ | ||
False, | ||
True, | ||
pytest.param( | ||
True, | ||
marks=pytest.mark.skipif(not pa_installed, reason="arrow not installed"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just realized we have a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does this work inside of pytest.param as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should, yeah. It returns a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, thx |
||
), | ||
], | ||
) | ||
def test_groupby_duplicate_columns(infer_string): | ||
|
@@ -2773,13 +2777,23 @@ def test_rolling_wrong_param_min_period(): | |
test_df.groupby("name")["val"].rolling(window=2, min_period=1).sum() | ||
|
||
|
||
def test_by_column_values_with_same_starting_value(): | ||
@pytest.mark.parametrize( | ||
"dtype", | ||
[ | ||
object, | ||
pytest.param( | ||
"string[pyarrow_numpy]", | ||
marks=pytest.mark.skipif(not pa_installed, reason="arrow not installed"), | ||
), | ||
], | ||
) | ||
def test_by_column_values_with_same_starting_value(dtype): | ||
# GH29635 | ||
df = DataFrame( | ||
{ | ||
"Name": ["Thomas", "Thomas", "Thomas John"], | ||
"Credit": [1200, 1300, 900], | ||
"Mood": ["sad", "happy", "happy"], | ||
"Mood": Series(["sad", "happy", "happy"], dtype=dtype), | ||
} | ||
) | ||
aggregate_details = {"Mood": Series.mode, "Credit": "sum"} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do we need these?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
got in while merging main, sorry