Skip to content

Commit

Permalink
GH-39076: [R] Fix tests that trigger confusing dplyr warnings (#39077)
Browse files Browse the repository at this point in the history
### Rationale for this change

Running our test suite results in many spurious warnings being printed that make it difficult to spot actual warnings.

### What changes are included in this PR?

The data used for specific tests involving `summarise()` was updated to not trigger the warnings.

### Are these changes tested?

Yes

### Are there any user-facing changes?

No
* Closes: #39076

Authored-by: Dewey Dunnington <dewey@voltrondata.com>
Signed-off-by: Dewey Dunnington <dewey@fishandwhistle.net>
  • Loading branch information
paleolimbot committed Dec 5, 2023
1 parent 3e123ad commit 2ab2c42
Showing 1 changed file with 20 additions and 24 deletions.
44 changes: 20 additions & 24 deletions r/tests/testthat/test-dplyr-summarize.R
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,8 @@ test_that("median()", {
})

test_that("quantile()", {
skip_if_not_available("dataset")

# The default method for stats::quantile() throws an error when na.rm = FALSE
# and the input contains NA or NaN, whereas the Arrow tdigest kernels return
# null in this situation. To work around this known difference, the tests
Expand Down Expand Up @@ -510,9 +512,9 @@ test_that("quantile()", {
)

# with a vector of 2+ probs
expect_warning(
Table$create(tbl) %>%
summarize(q = quantile(dbl, probs = c(0.2, 0.8), na.rm = TRUE)),
expect_error(
InMemoryDataset$create(data.frame(x = 1)) %>%
summarize(q = quantile(x, probs = c(0.2, 0.8), na.rm = TRUE)),
"quantile() with length(probs) != 1 not supported in Arrow",
fixed = TRUE
)
Expand Down Expand Up @@ -910,43 +912,37 @@ test_that("Not (yet) supported: implicit join", {

compare_dplyr_binding(
.input %>%
group_by(some_grouping) %>%
summarize(
dbl - mean(dbl)
) %>%
group_by(x) %>%
summarize(y - mean(y)) %>%
collect(),
tbl,
data.frame(x = 1, y = 2),
warning = paste(
"Expression dbl - mean\\(dbl\\) is not an aggregate expression",
"Expression y - mean\\(y\\) is not an aggregate expression",
"or is not supported in Arrow; pulling data into R"
)
)

compare_dplyr_binding(
.input %>%
group_by(some_grouping) %>%
summarize(
dbl
) %>%
group_by(x) %>%
summarize(y) %>%
collect(),
tbl,
data.frame(x = 1, y = 2),
warning = paste(
"Expression dbl is not an aggregate expression",
"Expression y is not an aggregate expression",
"or is not supported in Arrow; pulling data into R"
)
)

# This one could possibly be supported--in mutate()
compare_dplyr_binding(
.input %>%
group_by(some_grouping) %>%
summarize(
dbl - int
) %>%
group_by(x) %>%
summarize(x - y) %>%
collect(),
tbl,
data.frame(x = 1, y = 2, z = 3),
warning = paste(
"Expression dbl - int is not an aggregate expression",
"Expression x - y is not an aggregate expression",
"or is not supported in Arrow; pulling data into R"
)
)
Expand Down Expand Up @@ -1188,12 +1184,12 @@ test_that("Can use across() within summarise()", {

# across() doesn't work in summarise when input expressions evaluate to bare field references
expect_warning(
example_data %>%
data.frame(x = 1, y = 2) %>%
arrow_table() %>%
group_by(lgl) %>%
group_by(x) %>%
summarise(across(everything())) %>%
collect(),
regexp = "Expression int is not an aggregate expression or is not supported in Arrow; pulling data into R"
regexp = "Expression y is not an aggregate expression or is not supported in Arrow; pulling data into R"
)
})

Expand Down

0 comments on commit 2ab2c42

Please sign in to comment.