rapidsai · rapids-bot · Feb 26, 2024 · Feb 21, 2024 · Feb 21, 2024 · Feb 21, 2024
@@ -65,7 +65,7 @@ dependencies:
 - nvcomp==3.0.5
 - nvtx>=0.2.1
 - packaging
-- pandas>=2.0,<2.1.5dev0
+- pandas>=2.0,<2.2.1dev0
 - pandoc
 - pip
 - pre-commit

@@ -63,7 +63,7 @@ dependencies:
 - nvcomp==3.0.5
 - nvtx>=0.2.1
 - packaging
-- pandas>=2.0,<2.1.5dev0
+- pandas>=2.0,<2.2.1dev0
 - pandoc
 - pip
 - pre-commit

@@ -80,7 +80,7 @@ requirements:
     - {{ pin_compatible('protobuf', min_pin='x.x', max_pin='x') }}
     - python
     - typing_extensions >=4.0.0
-    - pandas >=2.0,<2.1.5dev0
+    - pandas >=2.0,<2.2.1dev0
     - cupy >=12.0.0
     - numba >=0.57
     - numpy >=1.21

@@ -497,7 +497,7 @@ dependencies:
         packages:
           - fsspec>=0.6.0
           - *numpy
-          - pandas>=2.0,<2.1.5dev0
+          - pandas>=2.0,<2.2.1dev0
   run_cudf:
     common:
       - output_types: [conda, requirements, pyproject]

@@ -9,7 +9,6 @@
 PANDAS_GE_201 = PANDAS_VERSION >= version.parse("2.0.1")
 PANDAS_GE_210 = PANDAS_VERSION >= version.parse("2.1.0")
 PANDAS_GE_214 = PANDAS_VERSION >= version.parse("2.1.4")
-PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0")
 PANDAS_LT_203 = PANDAS_VERSION < version.parse("2.0.3")
 PANDAS_GE_220 = PANDAS_VERSION >= version.parse("2.2.0")
 PANDAS_LT_300 = PANDAS_VERSION < version.parse("3.0.0")
@@ -23,7 +23,7 @@
     ScalarLike,
 )
 from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype
-from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_220
+from cudf.core._compat import PANDAS_GE_220
 from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
 from cudf.core.column import ColumnBase, as_column, column, string
 from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
@@ -324,17 +324,8 @@ def to_pandas(
         # `copy=True` workaround until following issue is fixed:
         # https://issues.apache.org/jira/browse/ARROW-9772
 
-        if PANDAS_GE_200:
-            host_values = self.to_arrow()
-        else:
-            # Pandas<2.0 supports only `datetime64[ns]`, hence the cast.
-            host_values = self.astype("datetime64[ns]").to_arrow()
-
-        # Pandas only supports `datetime64[ns]` dtype
-        # and conversion to this type is necessary to make
-        # arrow to pandas conversion happen for large values.
         return pd.Series(
-            host_values,
+            self.to_arrow(),
             copy=True,
             dtype=self.dtype,
             index=index,

@@ -14,7 +14,6 @@
 from cudf import _lib as libcudf
 from cudf._typing import ColumnBinaryOperand, DatetimeLikeScalar, Dtype
 from cudf.api.types import is_scalar, is_timedelta64_dtype
-from cudf.core._compat import PANDAS_GE_200
 from cudf.core.buffer import Buffer, acquire_spill_lock
 from cudf.core.column import ColumnBase, column, string
 from cudf.utils.dtypes import np_to_pa_dtype
@@ -153,20 +152,11 @@ def to_pandas(
         # `copy=True` workaround until following issue is fixed:
         # https://issues.apache.org/jira/browse/ARROW-9772
 
-        if PANDAS_GE_200:
-            host_values = self.to_arrow()
-        else:
-            # Pandas<2.0 supports only `timedelta64[ns]`, hence the cast.
-            host_values = self.astype("timedelta64[ns]").to_arrow()
-
-        # Pandas only supports `timedelta64[ns]` dtype
-        # and conversion to this type is necessary to make
-        # arrow to pandas conversion happen for large values.
         if nullable:
             raise NotImplementedError(f"{nullable=} is not implemented.")
 
         return pd.Series(
-            host_values,
+            self.to_arrow(),
             copy=True,
             dtype=self.dtype,
             index=index,

@@ -56,7 +56,7 @@
     is_string_dtype,
 )
 from cudf.core import column, df_protocol, indexing_utils, reshape
-from cudf.core._compat import PANDAS_GE_200, PANDAS_LT_300
+from cudf.core._compat import PANDAS_LT_300
 from cudf.core.abc import Serializable
 from cudf.core.column import (
     CategoricalColumn,
@@ -1338,13 +1338,6 @@ def __getitem__(self, arg):
             mask = arg
             if is_list_like(mask):
                 dtype = None
-                if len(mask) == 0 and not PANDAS_GE_200:
-                    # An explicit dtype is needed to avoid pandas
-                    # warnings from empty sets of columns. This
-                    # shouldn't be needed in pandas 2.0, we don't
-                    # need to specify a dtype when we know we're not
-                    # trying to match any columns so the default is fine.
-                    dtype = "float64"
                 mask = pd.Series(mask, dtype=dtype)
             if mask.dtype == "bool":
                 return self._apply_boolean_mask(BooleanMask(mask, len(self)))

@@ -39,7 +39,7 @@
     is_signed_integer_dtype,
 )
 from cudf.core._base_index import BaseIndex
-from cudf.core._compat import PANDAS_GE_200, PANDAS_LT_300
+from cudf.core._compat import PANDAS_LT_300
 from cudf.core.column import (
     CategoricalColumn,
     ColumnBase,
@@ -2098,23 +2098,14 @@ def to_pandas(self, *, nullable: bool = False) -> pd.DatetimeIndex:
         if nullable:
             raise NotImplementedError(f"{nullable=} is not implemented.")
 
-        if PANDAS_GE_200:
-            nanos = self._values
-        else:
-            # no need to convert to nanos with Pandas 2.x
-            if isinstance(self.dtype, pd.DatetimeTZDtype):
-                nanos = self._values.astype(
-                    pd.DatetimeTZDtype("ns", self.dtype.tz)
-                )
-            else:
-                nanos = self._values.astype("datetime64[ns]")
-
         freq = (
             self._freq._maybe_as_fast_pandas_offset()
             if self._freq is not None
             else None
         )
-        return pd.DatetimeIndex(nanos.to_pandas(), name=self.name, freq=freq)
+        return pd.DatetimeIndex(
+            self._values.to_pandas(), name=self.name, freq=freq
+        )
 
     @_cudf_nvtx_annotate
     def _get_dt_field(self, field):

@@ -5,9 +5,9 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220
+from cudf.core._compat import PANDAS_GE_210
 from cudf.core.index import IntervalIndex, interval_range
-from cudf.testing._utils import assert_eq, expect_warning_if
+from cudf.testing._utils import assert_eq
 
 
 def test_interval_constructor_default_closed():
@@ -142,7 +142,7 @@ def test_interval_range_periods_basic_dtype(start_t, end_t, periods_t):
 def test_interval_range_periods_warnings():
     start_val, end_val, periods_val = 0, 4, 1.0
 
-    with expect_warning_if(PANDAS_GE_220):
+    with pytest.warns(FutureWarning):
         pindex = pd.interval_range(
             start=start_val, end=end_val, periods=periods_val, closed="left"
         )

@@ -10,7 +10,7 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_210, PANDAS_LT_300
+from cudf.core._compat import PANDAS_GE_210, PANDAS_LT_300
 from cudf.testing._utils import (
     assert_eq,
     expect_warning_if,
@@ -183,10 +183,7 @@ def test_ufunc_series(request, ufunc, has_nulls, indexed):
 
     request.applymarker(
         pytest.mark.xfail(
-            condition=PANDAS_GE_200
-            and fname.startswith("bitwise")
-            and indexed
-            and has_nulls,
+            condition=fname.startswith("bitwise") and indexed and has_nulls,
             reason="https://github.com/pandas-dev/pandas/issues/52500",
         )
     )
@@ -385,52 +382,6 @@ def test_ufunc_dataframe(request, ufunc, has_nulls, indexed):
             reason=f"cupy has no support for '{fname}'",
         )
     )
-    request.applymarker(
-        pytest.mark.xfail(
-            condition=(
-                not PANDAS_GE_200
-                and indexed
-                in {
-                    "add",
-                    "arctan2",
-                    "bitwise_and",
-                    "bitwise_or",
-                    "bitwise_xor",
-                    "copysign",
-                    "divide",
-                    "divmod",
-                    "float_power",
-                    "floor_divide",
-                    "fmax",
-                    "fmin",
-                    "fmod",
-                    "heaviside",
-                    "gcd",
-                    "hypot",
-                    "lcm",
-                    "ldexp",
-                    "left_shift",
-                    "logaddexp",
-                    "logaddexp2",
-                    "logical_and",
-                    "logical_or",
-                    "logical_xor",
-                    "maximum",
-                    "minimum",
-                    "multiply",
-                    "nextafter",
-                    "power",
-                    "remainder",
-                    "right_shift",
-                    "subtract",
-                }
-            ),
-            reason=(
-                "pandas<2.0 does not currently support misaligned "
-                "indexes in DataFrames"
-            ),
-        )
-    )
 
     N = 100
     # Avoid zeros in either array to skip division by 0 errors. Also limit the

@@ -1726,24 +1726,7 @@ def test_datetime_dateoffset_binaryop(
             reason="https://github.com/pandas-dev/pandas/issues/57448",
         )
     )
-    request.applymarker(
-        pytest.mark.xfail(
-            not PANDAS_GE_220
-            and dtype in {"datetime64[ms]", "datetime64[s]"}
-            and frequency in ("microseconds", "nanoseconds")
-            and n_periods != 0,
-            reason="https://github.com/pandas-dev/pandas/pull/55595",
-        )
-    )
-    request.applymarker(
-        pytest.mark.xfail(
-            not PANDAS_GE_220
-            and dtype == "datetime64[us]"
-            and frequency == "nanoseconds"
-            and n_periods != 0,
-            reason="https://github.com/pandas-dev/pandas/pull/55595",
-        )
-    )
+
     date_col = [
         "2000-01-01 00:00:00.012345678",
         "2000-01-31 00:00:00.012345678",
@@ -1833,27 +1816,7 @@ def test_datetime_dateoffset_binaryop_multiple(request, date_col, kwargs, op):
     "dtype",
     ["datetime64[ns]", "datetime64[us]", "datetime64[ms]", "datetime64[s]"],
 )
-def test_datetime_dateoffset_binaryop_reflected(
-    request, n_periods, frequency, dtype
-):
-    request.applymarker(
-        pytest.mark.xfail(
-            not PANDAS_GE_220
-            and dtype in {"datetime64[ms]", "datetime64[s]"}
-            and frequency in ("microseconds", "nanoseconds")
-            and n_periods != 0,
-            reason="https://github.com/pandas-dev/pandas/pull/55595",
-        )
-    )
-    request.applymarker(
-        pytest.mark.xfail(
-            not PANDAS_GE_220
-            and dtype == "datetime64[us]"
-            and frequency == "nanoseconds"
-            and n_periods != 0,
-            reason="https://github.com/pandas-dev/pandas/pull/55595",
-        )
-    )
+def test_datetime_dateoffset_binaryop_reflected(n_periods, frequency, dtype):
     date_col = [
         "2000-01-01 00:00:00.012345678",
         "2000-01-31 00:00:00.012345678",

@@ -5,7 +5,6 @@
 import pytest
 
 import cudf
-from cudf.core._compat import PANDAS_GE_200
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.testing._utils import assert_eq
 
@@ -60,7 +59,7 @@ def test_to_pandas_simple(simple_data):
     assert_eq(
         ca.to_pandas_index(),
         pd.DataFrame(simple_data).columns,
-        exact=not PANDAS_GE_200,
+        exact=False,
     )