Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-39584: [R] fallback to source gracefully #39587

Merged
merged 11 commits into from Jan 16, 2024
2 changes: 2 additions & 0 deletions r/inst/build_arrow_static.sh
Expand Up @@ -74,6 +74,8 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \
-DARROW_DATASET=${ARROW_DATASET:-ON} \
-DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
-DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \
-DBoost_SOURCE=${Boost_SOURCE:-} \
-Dlz4_SOURCE=${lz4_SOURCE:-} \
-DARROW_FILESYSTEM=ON \
-DARROW_GCS=${ARROW_GCS:-$ARROW_DEFAULT_PARAM} \
-DARROW_JEMALLOC=${ARROW_JEMALLOC:-$ARROW_DEFAULT_PARAM} \
Expand Down
103 changes: 70 additions & 33 deletions r/tools/nixlibs.R
Expand Up @@ -96,18 +96,7 @@ try_download <- function(from_url, to_file, hush = quietly) {
!inherits(status, "try-error") && status == 0
}

download_binary <- function(lib) {
libfile <- paste0("arrow-", VERSION, ".zip")
binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip")
if (try_download(binary_url, libfile)) {
lg("Successfully retrieved libarrow (%s)", lib)
} else {
lg(
"Downloading libarrow failed for version %s (%s)\n at %s",
VERSION, lib, binary_url
)
libfile <- NULL
}
Comment on lines -99 to -110
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way that GH is rendering this change is awkward because I factored the checksumming out into a function. It might be easier to review in split diff and not unified

validate_checksum <- function(binary_url, libfile, hush = quietly) {
# Explicitly setting the env var to "false" will skip checksum validation
# e.g. in case the included checksums are stale.
skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
Expand All @@ -116,33 +105,66 @@ download_binary <- function(lib) {
# validate binary checksum for CRAN release only
if (!skip_checksum && dir.exists(checksum_path) && is_release ||
enforce_checksum) {
Comment on lines 106 to 107
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With this logic now we will skip the checksum (in this case, get to this function and then reply with checksum_ok == TRUE if any of these are true:

  • the skip var is set
  • it's not a release
  • the directory tools/checksums does not exist

Is that last one right? I get that we're trying to be protective here, and if there's no checksum file we will fail the checksum. But do we want to have this particular escape hatch here rather than failing the checksum (and then presumably building from source)?

Thoughts @assignUser ?

# Munge the path to the correct sha file which we include during the
# release process
checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)
checksum_file <- file.path(checksum_path, checksum_file)
checksum_cmd <- "shasum"
checksum_args <- c("--status", "-a", "512", "-c", checksum_file)

# shasum is not available on all linux versions
status_shasum <- try(
suppressWarnings(
system2("shasum", args = c("--help"), stdout = FALSE, stderr = FALSE)
),
silent = TRUE
)

if (inherits(status_shasum, "try-error") || is.integer(status_shasum) && status_shasum != 0) {
checksum_cmd <- "sha512sum"
checksum_args <- c("--status", "-c", checksum_file)
# Try `shasum`, and if that doesn't work, fall back to `sha512sum` if not found
# system2 doesn't generate an R error, so we can't use a tryCatch to
# move from shasum to sha512sum.
# The warnings from system2 if it fails pop up later in the log and thus are
# more confusing than they are helpful (so we suppress them)
checksum_ok <- suppressWarnings(system2(
"shasum",
args = c("--status", "-a", "512", "-c", checksum_file),
stdout = ifelse(quietly, FALSE, ""),
stderr = ifelse(quietly, FALSE, "")
)) == 0

if (!checksum_ok) {
checksum_ok <- suppressWarnings(system2(
"sha512sum",
args = c("--status", "-c", checksum_file),
stdout = ifelse(quietly, FALSE, ""),
stderr = ifelse(quietly, FALSE, "")
)) == 0
}
assignUser marked this conversation as resolved.
Show resolved Hide resolved

checksum_ok <- system2(checksum_cmd, args = checksum_args)

if (checksum_ok != 0) {
lg("Checksum validation failed for libarrow: %s/%s", lib, libfile)
unlink(libfile)
libfile <- NULL
if (checksum_ok) {
lg("Checksum validated successfully for libarrow")
} else {
lg("Checksum validated successfully for libarrow: %s/%s", lib, libfile)
lg("Checksum validation failed for libarrow")
unlink(libfile)
}
} else {
checksum_ok <- TRUE
}

# Return whether the checksum was successful
checksum_ok
}

download_binary <- function(lib) {
libfile <- paste0("arrow-", VERSION, ".zip")
binary_url <- paste0(arrow_repo, "bin/", lib, "/arrow-", VERSION, ".zip")
if (try_download(binary_url, libfile) && validate_checksum(binary_url, libfile)) {
lg("Successfully retrieved libarrow (%s)", lib)
} else {
# If the download or checksum fail, we will set libfile to NULL this will
# normally result in a source build after this.
# TODO: should we condense these together and only call them when verbose?
lg(
"Unable to retrieve libarrow for version %s (%s)",
VERSION, lib
)
if (!quietly) {
lg(
"Attempted to download the libarrow binary from: %s",
binary_url
)
}
libfile <- NULL
}

libfile
Expand Down Expand Up @@ -464,7 +486,7 @@ env_vars_as_string <- function(env_var_list) {
stopifnot(
length(env_var_list) == length(names(env_var_list)),
all(grepl("^[^0-9]", names(env_var_list))),
all(grepl("^[A-Z0-9_]+$", names(env_var_list))),
all(grepl("^[a-zA-Z0-9_]+$", names(env_var_list))),
!any(grepl("'", env_var_list, fixed = TRUE))
)
env_var_string <- paste0(names(env_var_list), "='", env_var_list, "'", collapse = " ")
Expand Down Expand Up @@ -539,10 +561,25 @@ build_libarrow <- function(src_dir, dst_dir) {
env_var_list <- c(env_var_list, ARROW_DEPENDENCY_SOURCE = "BUNDLED")
}

# On macOS, if not otherwise set, let's override Boost_SOURCE to be bundled
assignUser marked this conversation as resolved.
Show resolved Hide resolved
if (on_macos) {
# Using lowercase (e.g. Boost_SOURCE) to match the cmake args we use already.
deps_to_bundle <- c("Boost", "lz4")
for (dep_to_bundle in deps_to_bundle) {
env_var <- paste0(dep_to_bundle, "_SOURCE")
if (Sys.getenv(env_var) == "") {
jonkeane marked this conversation as resolved.
Show resolved Hide resolved
env_var_list <- c(env_var_list, setNames("BUNDLED", env_var))
}
}
}

env_var_list <- with_cloud_support(env_var_list)

# turn_off_all_optional_features() needs to happen after
# with_cloud_support(), since it might turn features ON.
# TODO: could we, should we have download_ok also include "download has succeeded"
# so we can disable these things if someone's machine is suddenly offline but they
# find themselves here and demand a working compilation?
assignUser marked this conversation as resolved.
Show resolved Hide resolved
thirdparty_deps_unavailable <- !download_ok &&
!dir.exists(thirdparty_dependency_dir) &&
!env_is("ARROW_DEPENDENCY_SOURCE", "system")
Expand Down