New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-39584: [R] fallback to source gracefully #39587
Changes from 10 commits
50e2924
e24fcd3
97c3787
97e806e
a8f7578
5acf772
bdf08ec
e2e8658
5d3508c
c530814
79d5b94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,18 +96,7 @@ try_download <- function(from_url, to_file, hush = quietly) { | |
!inherits(status, "try-error") && status == 0 | ||
} | ||
|
||
download_binary <- function(lib) { | ||
libfile <- paste0("arrow-", VERSION, ".zip") | ||
binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") | ||
if (try_download(binary_url, libfile)) { | ||
lg("Successfully retrieved libarrow (%s)", lib) | ||
} else { | ||
lg( | ||
"Downloading libarrow failed for version %s (%s)\n at %s", | ||
VERSION, lib, binary_url | ||
) | ||
libfile <- NULL | ||
} | ||
validate_checksum <- function(binary_url, libfile, hush = quietly) { | ||
# Explicitly setting the env var to "false" will skip checksum validation | ||
# e.g. in case the included checksums are stale. | ||
skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false") | ||
|
@@ -116,33 +105,66 @@ download_binary <- function(lib) { | |
# validate binary checksum for CRAN release only | ||
if (!skip_checksum && dir.exists(checksum_path) && is_release || | ||
enforce_checksum) { | ||
Comment on lines
106
to
107
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With this logic now we will skip the checksum (in this case, get to this function and then reply with
Is that last one right? I get that we're trying to be protective here, and if there's no checksum file we will fail the checksum. But do we want to have this particular escape hatch here rather than failing the checksum (and then presumably building from source)? Thoughts @assignUser ? |
||
# Munge the path to the correct sha file which we include during the | ||
# release process | ||
checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url) | ||
checksum_file <- file.path(checksum_path, checksum_file) | ||
checksum_cmd <- "shasum" | ||
checksum_args <- c("--status", "-a", "512", "-c", checksum_file) | ||
|
||
# shasum is not available on all linux versions | ||
status_shasum <- try( | ||
suppressWarnings( | ||
system2("shasum", args = c("--help"), stdout = FALSE, stderr = FALSE) | ||
), | ||
silent = TRUE | ||
) | ||
|
||
if (inherits(status_shasum, "try-error") || is.integer(status_shasum) && status_shasum != 0) { | ||
checksum_cmd <- "sha512sum" | ||
checksum_args <- c("--status", "-c", checksum_file) | ||
# Try `shasum`, and if that doesn't work, fall back to `sha512sum` if not found | ||
# system2 doesn't generate an R error, so we can't use a tryCatch to | ||
# move from shasum to sha512sum. | ||
# The warnings from system2 if it fails pop up later in the log and thus are | ||
# more confusing than they are helpful (so we suppress them) | ||
checksum_ok <- suppressWarnings(system2( | ||
"shasum", | ||
args = c("--status", "-a", "512", "-c", checksum_file), | ||
stdout = ifelse(quietly, FALSE, ""), | ||
stderr = ifelse(quietly, FALSE, "") | ||
)) == 0 | ||
|
||
if (!checksum_ok) { | ||
checksum_ok <- suppressWarnings(system2( | ||
"sha512sum", | ||
args = c("--status", "-c", checksum_file), | ||
stdout = ifelse(quietly, FALSE, ""), | ||
stderr = ifelse(quietly, FALSE, "") | ||
)) == 0 | ||
} | ||
assignUser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
checksum_ok <- system2(checksum_cmd, args = checksum_args) | ||
|
||
if (checksum_ok != 0) { | ||
lg("Checksum validation failed for libarrow: %s/%s", lib, libfile) | ||
unlink(libfile) | ||
libfile <- NULL | ||
if (checksum_ok) { | ||
lg("Checksum validated successfully for libarrow") | ||
} else { | ||
lg("Checksum validated successfully for libarrow: %s/%s", lib, libfile) | ||
lg("Checksum validation failed for libarrow") | ||
unlink(libfile) | ||
} | ||
} else { | ||
checksum_ok <- TRUE | ||
} | ||
|
||
# Return whether the checksum was successful | ||
checksum_ok | ||
} | ||
|
||
download_binary <- function(lib) { | ||
libfile <- paste0("arrow-", VERSION, ".zip") | ||
binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip") | ||
if (try_download(binary_url, libfile) && validate_checksum(binary_url, libfile)) { | ||
lg("Successfully retrieved libarrow (%s)", lib) | ||
} else { | ||
# If the download or checksum fail, we will set libfile to NULL this will | ||
# normally result in a source build after this. | ||
# TODO: should we condense these together and only call them when verbose? | ||
lg( | ||
"Unable to retrieve libarrow for version %s (%s)", | ||
VERSION, lib | ||
) | ||
if (!quietly) { | ||
lg( | ||
"Attempted to download the libarrow binary from: %s", | ||
binary_url | ||
) | ||
} | ||
libfile <- NULL | ||
} | ||
|
||
libfile | ||
|
@@ -464,7 +486,7 @@ env_vars_as_string <- function(env_var_list) { | |
stopifnot( | ||
length(env_var_list) == length(names(env_var_list)), | ||
all(grepl("^[^0-9]", names(env_var_list))), | ||
all(grepl("^[A-Z0-9_]+$", names(env_var_list))), | ||
all(grepl("^[a-zA-Z0-9_]+$", names(env_var_list))), | ||
!any(grepl("'", env_var_list, fixed = TRUE)) | ||
) | ||
env_var_string <- paste0(names(env_var_list), "='", env_var_list, "'", collapse = " ") | ||
|
@@ -539,10 +561,25 @@ build_libarrow <- function(src_dir, dst_dir) { | |
env_var_list <- c(env_var_list, ARROW_DEPENDENCY_SOURCE = "BUNDLED") | ||
} | ||
|
||
# On macOS, if not otherwise set, let's override Boost_SOURCE to be bundled | ||
assignUser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (on_macos) { | ||
# Using lowercase (e.g. Boost_SOURCE) to match the cmake args we use already. | ||
deps_to_bundle <- c("Boost", "lz4") | ||
for (dep_to_bundle in deps_to_bundle) { | ||
env_var <- paste0(dep_to_bundle, "_SOURCE") | ||
if (Sys.getenv(env_var) == "") { | ||
jonkeane marked this conversation as resolved.
Show resolved
Hide resolved
|
||
env_var_list <- c(env_var_list, setNames("BUNDLED", env_var)) | ||
} | ||
} | ||
} | ||
|
||
env_var_list <- with_cloud_support(env_var_list) | ||
|
||
# turn_off_all_optional_features() needs to happen after | ||
# with_cloud_support(), since it might turn features ON. | ||
# TODO: could we, should we have download_ok also include "download has succeeded" | ||
# so we can disable these things if someone's machine is suddenly offline but they | ||
# find themselves here and demand a working compilation? | ||
assignUser marked this conversation as resolved.
Show resolved
Hide resolved
|
||
thirdparty_deps_unavailable <- !download_ok && | ||
!dir.exists(thirdparty_dependency_dir) && | ||
!env_is("ARROW_DEPENDENCY_SOURCE", "system") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The way that GH is rendering this change is awkward because I factored the checksumming out into a function. It might be easier to review in split diff and not unified