Skip to content

Commit

Permalink
Deprecate fill_method and limit in pct_change APIs (#14277)
Browse files Browse the repository at this point in the history
This PR deprecated `fill_method` and `limit` in `Series.pct_change`, `DataFrame.pct_change` and `Groupby.pct_change`

This PR:
```
= 1263 failed, 98996 passed, 2069 skipped, 776 xfailed, 312 xpassed, 20 errors in 701.08s (0:11:41) =
```

On `pandas_2.0_feature_branch`:
```
= 1584 failed, 98570 passed, 2069 skipped, 776 xfailed, 312 xpassed, 20 errors in 737.24s (0:12:17) =
```
  • Loading branch information
galipremsagar committed Nov 29, 2023
1 parent 88e1978 commit 90788f2
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 31 deletions.
36 changes: 32 additions & 4 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7137,7 +7137,7 @@ def explode(self, column, ignore_index=False):
return super()._explode(column, ignore_index)

def pct_change(
self, periods=1, fill_method="ffill", limit=None, freq=None
self, periods=1, fill_method=no_default, limit=no_default, freq=None
):
"""
Calculates the percent change between sequential elements
Expand All @@ -7149,9 +7149,15 @@ def pct_change(
Periods to shift for forming percent change.
fill_method : str, default 'ffill'
How to handle NAs before computing percent changes.
.. deprecated:: 23.12
All options of `fill_method` are deprecated except `fill_method=None`.
limit : int, optional
The number of consecutive NAs to fill before stopping.
Not yet implemented.
.. deprecated:: 23.12
`limit` is deprecated.
freq : str, optional
Increment to use from time series API.
Not yet implemented.
Expand All @@ -7160,16 +7166,38 @@ def pct_change(
-------
DataFrame
"""
if limit is not None:
if limit is not no_default:
raise NotImplementedError("limit parameter not supported yet.")
if freq is not None:
raise NotImplementedError("freq parameter not supported yet.")
elif fill_method not in {"ffill", "pad", "bfill", "backfill"}:
elif fill_method not in {
no_default,
None,
"ffill",
"pad",
"bfill",
"backfill",
}:
raise ValueError(
"fill_method must be one of 'ffill', 'pad', "
"fill_method must be one of None, 'ffill', 'pad', "
"'bfill', or 'backfill'."
)

if fill_method not in (no_default, None) or limit is not no_default:
# Do not remove until pandas 3.0 support is added.
warnings.warn(
"The 'fill_method' and 'limit' keywords in "
f"{type(self).__name__}.pct_change are deprecated and will be "
"removed in a future version. Either fill in any non-leading NA values prior "
"to calling pct_change or specify 'fill_method=None' to not fill NA "
"values.",
FutureWarning,
)
if fill_method is no_default:
fill_method = "ffill"
if limit is no_default:
limit = None

with warnings.catch_warnings():
warnings.simplefilter("ignore")
data = self.fillna(method=fill_method, limit=limit)
Expand Down
38 changes: 28 additions & 10 deletions python/cudf/cudf/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from cudf._lib.sort import segmented_sort_by_key
from cudf._lib.types import size_type_dtype
from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType
from cudf.api.extensions import no_default
from cudf.api.types import is_bool_dtype, is_float_dtype, is_list_like
from cudf.core.abc import Serializable
from cudf.core.column.column import ColumnBase, arange, as_column
Expand Down Expand Up @@ -2286,7 +2287,12 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):

@_cudf_nvtx_annotate
def pct_change(
self, periods=1, fill_method="ffill", axis=0, limit=None, freq=None
self,
periods=1,
fill_method=no_default,
axis=0,
limit=no_default,
freq=None,
):
"""
Calculates the percent change between sequential elements
Expand All @@ -2298,9 +2304,15 @@ def pct_change(
Periods to shift for forming percent change.
fill_method : str, default 'ffill'
How to handle NAs before computing percent changes.
.. deprecated:: 23.12
All options of `fill_method` are deprecated except `fill_method=None`.
limit : int, optional
The number of consecutive NAs to fill before stopping.
Not yet implemented.
.. deprecated:: 23.12
`limit` is deprecated.
freq : str, optional
Increment to use from time series API.
Not yet implemented.
Expand All @@ -2312,25 +2324,31 @@ def pct_change(
"""
if not axis == 0:
raise NotImplementedError("Only axis=0 is supported.")
if limit is not None:
if limit is not no_default:
raise NotImplementedError("limit parameter not supported yet.")
if freq is not None:
raise NotImplementedError("freq parameter not supported yet.")
elif fill_method not in {"ffill", "pad", "bfill", "backfill"}:
elif fill_method not in {no_default, None, "ffill", "bfill"}:
raise ValueError(
"fill_method must be one of 'ffill', 'pad', "
"'bfill', or 'backfill'."
"fill_method must be one of 'ffill', or" "'bfill'."
)

if fill_method in ("pad", "backfill"):
alternative = "ffill" if fill_method == "pad" else "bfill"
# Do not remove until pandas 2.0 support is added.
if fill_method not in (no_default, None) or limit is not no_default:
# Do not remove until pandas 3.0 support is added.
warnings.warn(
f"{fill_method} is deprecated and will be removed in a future "
f"version. Use f{alternative} instead.",
"The 'fill_method' keyword being not None and the 'limit' keywords in "
f"{type(self).__name__}.pct_change are deprecated and will be "
"removed in a future version. Either fill in any non-leading NA values prior "
"to calling pct_change or specify 'fill_method=None' to not fill NA "
"values.",
FutureWarning,
)

if fill_method in (no_default, None):
fill_method = "ffill"
if limit is no_default:
limit = None

with warnings.catch_warnings():
warnings.simplefilter("ignore")
filled = self.fillna(method=fill_method, limit=limit)
Expand Down
36 changes: 32 additions & 4 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3569,7 +3569,7 @@ def explode(self, ignore_index=False):

@_cudf_nvtx_annotate
def pct_change(
self, periods=1, fill_method="ffill", limit=None, freq=None
self, periods=1, fill_method=no_default, limit=no_default, freq=None
):
"""
Calculates the percent change between sequential elements
Expand All @@ -3581,9 +3581,15 @@ def pct_change(
Periods to shift for forming percent change.
fill_method : str, default 'ffill'
How to handle NAs before computing percent changes.
.. deprecated:: 23.12
All options of `fill_method` are deprecated except `fill_method=None`.
limit : int, optional
The number of consecutive NAs to fill before stopping.
Not yet implemented.
.. deprecated:: 23.12
`limit` is deprecated.
freq : str, optional
Increment to use from time series API.
Not yet implemented.
Expand All @@ -3592,15 +3598,37 @@ def pct_change(
-------
Series
"""
if limit is not None:
if limit is not no_default:
raise NotImplementedError("limit parameter not supported yet.")
if freq is not None:
raise NotImplementedError("freq parameter not supported yet.")
elif fill_method not in {"ffill", "pad", "bfill", "backfill"}:
elif fill_method not in {
no_default,
None,
"ffill",
"pad",
"bfill",
"backfill",
}:
raise ValueError(
"fill_method must be one of 'ffill', 'pad', "
"fill_method must be one of None, 'ffill', 'pad', "
"'bfill', or 'backfill'."
)
if fill_method not in (no_default, None) or limit is not no_default:
# Do not remove until pandas 3.0 support is added.
warnings.warn(
"The 'fill_method' and 'limit' keywords in "
f"{type(self).__name__}.pct_change are deprecated and will be "
"removed in a future version. Either fill in any non-leading NA values prior "
"to calling pct_change or specify 'fill_method=None' to not fill NA "
"values.",
FutureWarning,
)

if fill_method is no_default:
fill_method = "ffill"
if limit is no_default:
limit = None

with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand Down
15 changes: 12 additions & 3 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
PANDAS_GE_134,
PANDAS_GE_150,
PANDAS_GE_200,
PANDAS_GE_210,
PANDAS_LT_140,
)
from cudf.api.extensions import no_default
from cudf.core.buffer.spill_manager import get_global_manager
from cudf.core.column import column
from cudf.testing import _utils as utils
Expand Down Expand Up @@ -9896,13 +9898,20 @@ def test_dataframe_rename_duplicate_column():
],
)
@pytest.mark.parametrize("periods", [-5, -2, 0, 2, 5])
@pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"])
@pytest.mark.parametrize(
"fill_method", ["ffill", "bfill", "pad", "backfill", no_default]
)
def test_dataframe_pct_change(data, periods, fill_method):
gdf = cudf.DataFrame(data)
pdf = gdf.to_pandas()

actual = gdf.pct_change(periods=periods, fill_method=fill_method)
expected = pdf.pct_change(periods=periods, fill_method=fill_method)
with expect_warning_if(fill_method is not no_default):
actual = gdf.pct_change(periods=periods, fill_method=fill_method)
with expect_warning_if(
PANDAS_GE_210
and (fill_method is not no_default or pdf.isna().any().any())
):
expected = pdf.pct_change(periods=periods, fill_method=fill_method)

assert_eq(expected, actual)

Expand Down
23 changes: 16 additions & 7 deletions python/cudf/cudf/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import cudf
from cudf import DataFrame, Series
from cudf.api.extensions import no_default
from cudf.core._compat import (
PANDAS_GE_150,
PANDAS_LT_140,
Expand Down Expand Up @@ -3062,17 +3063,25 @@ def test_groupby_transform_maintain_index(by):
],
)
@pytest.mark.parametrize("periods", [-5, -2, 0, 2, 5])
@pytest.mark.parametrize("fill_method", ["ffill", "bfill"])
@pytest.mark.parametrize("fill_method", ["ffill", "bfill", no_default, None])
def test_groupby_pct_change(data, gkey, periods, fill_method):
gdf = cudf.DataFrame(data)
pdf = gdf.to_pandas()

actual = gdf.groupby(gkey).pct_change(
periods=periods, fill_method=fill_method
)
expected = pdf.groupby(gkey).pct_change(
periods=periods, fill_method=fill_method
)
with expect_warning_if(fill_method not in (no_default, None)):
actual = gdf.groupby(gkey).pct_change(
periods=periods, fill_method=fill_method
)
with expect_warning_if(
PANDAS_GE_210
and (
fill_method not in (no_default, None)
or (fill_method is not None and pdf.isna().any().any())
)
):
expected = pdf.groupby(gkey).pct_change(
periods=periods, fill_method=fill_method
)

assert_eq(expected, actual)

Expand Down
18 changes: 15 additions & 3 deletions python/cudf/cudf/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

import cudf
from cudf.api.extensions import no_default
from cudf.datasets import randomdata
from cudf.testing._utils import (
_create_cudf_series_float64_default,
Expand All @@ -16,6 +17,7 @@
assert_exceptions_equal,
expect_warning_if,
)
from cudf.core._compat import PANDAS_GE_210

params_dtypes = [np.int32, np.uint32, np.float32, np.float64]
methods = ["min", "max", "sum", "mean", "var", "std"]
Expand Down Expand Up @@ -356,14 +358,24 @@ def test_series_median(dtype, num_na):
],
)
@pytest.mark.parametrize("periods", range(-5, 5))
@pytest.mark.parametrize("fill_method", ["ffill", "bfill", "pad", "backfill"])
@pytest.mark.parametrize(
"fill_method", ["ffill", "bfill", "pad", "backfill", no_default, None]
)
def test_series_pct_change(data, periods, fill_method):
cs = cudf.Series(data)
ps = cs.to_pandas()

if np.abs(periods) <= len(cs):
got = cs.pct_change(periods=periods, fill_method=fill_method)
expected = ps.pct_change(periods=periods, fill_method=fill_method)
with expect_warning_if(fill_method not in (no_default, None)):
got = cs.pct_change(periods=periods, fill_method=fill_method)
with expect_warning_if(
PANDAS_GE_210
and (
fill_method not in (no_default, None)
or (fill_method is not None and ps.isna().any())
)
):
expected = ps.pct_change(periods=periods, fill_method=fill_method)
np.testing.assert_array_almost_equal(
got.to_numpy(na_value=np.nan), expected
)
Expand Down

0 comments on commit 90788f2

Please sign in to comment.