Skip to content

Commit

Permalink
Fix accessing .columns issue (#15212)
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Mar 4, 2024
1 parent 8dbe7cb commit 903dcac
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 30 deletions.
4 changes: 3 additions & 1 deletion python/cudf/cudf/_lib/utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ cpdef generate_pandas_metadata(table, index):
col
for col in table._columns
],
df=table,
# It is OKAY to do `.head(0).to_pandas()` because
# this method will extract `.columns` metadata only
df=table.head(0).to_pandas(),
column_names=col_names,
index_levels=index_levels,
index_descriptors=index_descriptors,
Expand Down
7 changes: 6 additions & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2872,6 +2872,8 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self:
self._column_names,
None if has_range_index or not keep_index else self._index.names,
)
result._data.label_dtype = self._data.label_dtype
result._data.rangeindex = self._data.rangeindex

if keep_index and has_range_index:
result.index = self.index[start:stop]
Expand Down Expand Up @@ -3053,7 +3055,7 @@ def duplicated(self, subset=None, keep="first"):

@_cudf_nvtx_annotate
def _empty_like(self, keep_index=True) -> Self:
return self._from_columns_like_self(
result = self._from_columns_like_self(
libcudf.copying.columns_empty_like(
[
*(self._index._data.columns if keep_index else ()),
Expand All @@ -3063,6 +3065,9 @@ def _empty_like(self, keep_index=True) -> Self:
self._column_names,
self._index.names if keep_index else None,
)
result._data.label_dtype = self._data.label_dtype
result._data.rangeindex = self._data.rangeindex
return result

def _split(self, splits, keep_index=True):
if self._num_rows == 0:
Expand Down
55 changes: 27 additions & 28 deletions python/cudf/cudf/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3012,43 +3012,31 @@ def test_series_rename():
@pytest.mark.parametrize("data_type", dtypes)
@pytest.mark.parametrize("nelem", [0, 100])
def test_head_tail(nelem, data_type):
def check_index_equality(left, right):
assert left.index.equals(right.index)

def check_values_equality(left, right):
if len(left) == 0 and len(right) == 0:
return None

np.testing.assert_array_equal(left.to_pandas(), right.to_pandas())

def check_frame_series_equality(left, right):
check_index_equality(left, right)
check_values_equality(left, right)

gdf = cudf.DataFrame(
pdf = pd.DataFrame(
{
"a": np.random.randint(0, 1000, nelem).astype(data_type),
"b": np.random.randint(0, 1000, nelem).astype(data_type),
}
)
gdf = cudf.from_pandas(pdf)

check_frame_series_equality(gdf.head(), gdf[:5])
check_frame_series_equality(gdf.head(3), gdf[:3])
check_frame_series_equality(gdf.head(-2), gdf[:-2])
check_frame_series_equality(gdf.head(0), gdf[0:0])
assert_eq(gdf.head(), pdf.head())
assert_eq(gdf.head(3), pdf.head(3))
assert_eq(gdf.head(-2), pdf.head(-2))
assert_eq(gdf.head(0), pdf.head(0))

check_frame_series_equality(gdf["a"].head(), gdf["a"][:5])
check_frame_series_equality(gdf["a"].head(3), gdf["a"][:3])
check_frame_series_equality(gdf["a"].head(-2), gdf["a"][:-2])
assert_eq(gdf["a"].head(), pdf["a"].head())
assert_eq(gdf["a"].head(3), pdf["a"].head(3))
assert_eq(gdf["a"].head(-2), pdf["a"].head(-2))

check_frame_series_equality(gdf.tail(), gdf[-5:])
check_frame_series_equality(gdf.tail(3), gdf[-3:])
check_frame_series_equality(gdf.tail(-2), gdf[2:])
check_frame_series_equality(gdf.tail(0), gdf[0:0])
assert_eq(gdf.tail(), pdf.tail())
assert_eq(gdf.tail(3), pdf.tail(3))
assert_eq(gdf.tail(-2), pdf.tail(-2))
assert_eq(gdf.tail(0), pdf.tail(0))

check_frame_series_equality(gdf["a"].tail(), gdf["a"][-5:])
check_frame_series_equality(gdf["a"].tail(3), gdf["a"][-3:])
check_frame_series_equality(gdf["a"].tail(-2), gdf["a"][2:])
assert_eq(gdf["a"].tail(), pdf["a"].tail())
assert_eq(gdf["a"].tail(3), pdf["a"].tail(3))
assert_eq(gdf["a"].tail(-2), pdf["a"].tail(-2))


def test_tail_for_string():
Expand Down Expand Up @@ -4328,6 +4316,17 @@ def test_one_row_head():
assert_eq(head_pdf, head_gdf)


@pytest.mark.parametrize("index", [None, [123], ["a", "b"]])
def test_no_cols_head(index):
pdf = pd.DataFrame(index=index)
gdf = cudf.from_pandas(pdf)

head_gdf = gdf.head()
head_pdf = pdf.head()

assert_eq(head_pdf, head_gdf)


@pytest.mark.parametrize("dtype", ALL_TYPES)
@pytest.mark.parametrize(
"np_dtype,pd_dtype",
Expand Down

0 comments on commit 903dcac

Please sign in to comment.