Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix problems with inconsistent merra URLs; closes #23 #28

Merged
merged 5 commits into from
Feb 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ jobs:

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
EARTHDATA_USERNAME: ${{ secrets.EARTHDATA_USERNAME }}
EARTHDATA_PASSWORD: ${{ secrets.EARTHDATA_PASSWORD }}
R_KEEP_PKG_SOURCE: yes

steps:
Expand Down
18 changes: 8 additions & 10 deletions R/aqs.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,8 @@ get_daily_aqs <- function(pollutant = c("pm25", "ozone", "no2"), year = "2021")
)[pollutant]
file_name <- glue::glue("daily_{pollutant_code}_{year}.zip")
on.exit(unlink(file_name))
# TODO change to httr or httr2
utils::download.file(
url = glue::glue("https://aqs.epa.gov/aqsweb/airdata/{file_name}"),
destfile = file_name,
quiet = TRUE
)
glue::glue("https://aqs.epa.gov/aqsweb/airdata/{file_name}") |>
utils::download.file(destfile = file_name, quiet = TRUE)
unzipped_file_name <- gsub(pattern = ".zip", ".csv", file_name, fixed = TRUE)
on.exit(unlink(unzipped_file_name), add = TRUE)
utils::unzip(file_name)
Expand All @@ -64,7 +60,9 @@ get_daily_aqs <- function(pollutant = c("pm25", "ozone", "no2"), year = "2021")
return(d_out)
}

utils::globalVariables(c("Sample Duration", "Observation Percent",
"State Code", "County Code", "Site Num",
"Latitude", "Longitude", "Arithmetic Mean", "Date Local",
"lon", "lat", "conc"))
utils::globalVariables(c(
"Sample Duration", "Observation Percent",
"State Code", "County Code", "Site Num",
"Latitude", "Longitude", "Arithmetic Mean", "Date Local",
"lon", "lat", "conc"
))
2 changes: 2 additions & 0 deletions R/helper.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
.onLoad <- function(...) {
dir.create(tools::R_user_dir("appc", "data"), recursive = TRUE, showWarnings=FALSE)
options(timeout = max(900, getOption("timeout")),
download.file.method = "libcurl")
}

#' Get the geography of the 2020 contiguous United States
Expand Down
21 changes: 10 additions & 11 deletions R/install_released_data.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' Download pre-installed data from GitHub release
#'
#'
#' `install_smoke_pm_data()`, `install_merra_data()`, `install_traffic()`, and install_nei_point_data()`
#' all download geospatial data directly from the provider and then transform or subset data into
#' smaller files to be used by appc `get_*_data()` functions.
Expand All @@ -9,7 +9,7 @@
#' These functions are utilized automatically by the geomarker assessment functions,
#' but can be called without input data to install the geomarker data ahead of time,
#' if external internet access is not possible after input data is added.
#' *Note that some of the install functions require a system installation of `gdal`.*
#' *Note that some of the install functions require a system installation of `gdal`.*
#'
#' To turn *off* the default usage of downloading
#' pre-generated data and to instead install data
Expand All @@ -20,15 +20,14 @@
#' @keywords internal
install_released_data <- function(released_data_name, package_version = utils::packageVersion("appc")) {
dest_file <- fs::path(tools::R_user_dir("appc", "data"), released_data_name)
glue::glue("https://github.com", "geomarker-io",
"appc", "releases", "download",
"v{package_version}",
released_data_name,
.sep = "/"
) |>
httr2::request() |>
httr2::req_progress() |>
httr2::req_perform(path = dest_file)
dl_url <- glue::glue(
"https://github.com", "geomarker-io",
"appc", "releases", "download",
"v{package_version}",
released_data_name,
.sep = "/"
)
utils::download.file(dl_url, dest_file, quiet = FALSE)
}

#' install_source_preference()
Expand Down
79 changes: 43 additions & 36 deletions R/merra.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' Get MERRA-2 aerosol diagnostics data
#'
#'
#' Total and component (Dust, OC, BC, SS, SO4) surface PM2.5 concentrations
#' from the MERRA-2 [M2T1NXAER v5.12.4](https://disc.gsfc.nasa.gov/datasets/M2T1NXAER_5.12.4/summary) product.
#' @details
Expand All @@ -23,11 +23,11 @@
#' `merra_ss`, `merra_so4`, `merra_pm25`) with one row per date in `dates`
#' @export
#' @examples
#' # d <- list(
#' # "8841b39a7c46e25f" = as.Date(c("2023-05-18", "2023-11-06")),
#' # "8841a45555555555" = as.Date(c("2023-06-22", "2023-08-15"))
#' # )
#' # get_merra_data(x = s2::as_s2_cell(names(d)), dates = d)
#' d <- list(
#' "8841b39a7c46e25f" = as.Date(c("2023-05-18", "2023-11-06")),
#' "8841a45555555555" = as.Date(c("2023-06-22", "2023-08-15"))
#' )
#' get_merra_data(x = s2::as_s2_cell(names(d)), dates = d)
get_merra_data <- function(x, dates) {
if (!inherits(x, "s2_cell")) stop("x must be a s2_cell vector", call. = FALSE)
d_merra <-
Expand All @@ -46,7 +46,7 @@ get_merra_data <- function(x, dates) {
dplyr::mutate(s2_geography = s2::s2_cell_to_lnglat(s2)) |>
stats::na.omit() # some s2 failed to convert to lnglat ?

x_closest_merra <-
x_closest_merra <-
x |>
s2::s2_cell_to_lnglat() |>
s2::s2_closest_feature(d_merra$s2_geography)
Expand All @@ -57,13 +57,13 @@ get_merra_data <- function(x, dates) {

out <-
purrr::map2(x_closest_merra$data, dates,
\(xx, dd) {
tibble::tibble(date = dd) |>
dplyr::left_join(xx, by = "date") |>
dplyr::select(-date)
},
.progress = "extracting closest merra data"
)
\(xx, dd) {
tibble::tibble(date = dd) |>
dplyr::left_join(xx, by = "date") |>
dplyr::select(-date)
},
.progress = "extracting closest merra data"
)
names(out) <- as.character(x)
return(out)
}
Expand All @@ -76,15 +76,20 @@ get_merra_data <- function(x, dates) {
install_merra_data <- function(merra_year = as.character(2016:2023)) {
merra_year <- rlang::arg_match(merra_year)
dest_file <- fs::path(tools::R_user_dir("appc", "data"),
paste0(c("merra", merra_year), collapse = "_"), ext = "parquet")
if (fs::file_exists(dest_file)) return(as.character(dest_file))
paste0(c("merra", merra_year), collapse = "_"),
ext = "parquet"
)
if (fs::file_exists(dest_file)) {
return(as.character(dest_file))
}
if (!install_source_preference()) {
install_released_data(released_data_name = glue::glue("merra_{merra_year}.parquet"))
return(as.character(dest_file))
}
date_seq <- seq(as.Date(paste(c(merra_year, "01", "01"), collapse = "-")),
as.Date(paste(c(merra_year, "12", "31"), collapse = "-")),
by = 1)
as.Date(paste(c(merra_year, "12", "31"), collapse = "-")),
by = 1
)
message(glue::glue("downloading and subsetting daily MERRA files for {merra_year}"))
# takes a long time, so cache intermediate daily downloads and extractions
merra_data <- mappp::mappp(date_seq, create_daily_merra_data, cache = TRUE, cache_name = "merra_cache")
Expand All @@ -98,7 +103,7 @@ install_merra_data <- function(merra_year = as.character(2016:2023)) {

#' `create_daily_merra_data` downloads and computes MERRA PM2.5 data for a single day
#' @return for `create_daily_merra_data()`, a tibble with columns for s2,
#' date, and concentrations of PM2.5 total, dust, oc, bc, ss, so4
#' date, and concentrations of PM2.5 total, dust, oc, bc, ss, so4
#' @export
#' @rdname get_merra_data
create_daily_merra_data <- function(merra_date) {
Expand All @@ -107,25 +112,25 @@ create_daily_merra_data <- function(merra_date) {
earthdata_secrets <- Sys.getenv(c("EARTHDATA_USERNAME", "EARTHDATA_PASSWORD"), unset = NA)
if (any(is.na(earthdata_secrets))) stop("EARTHDATA_USERNAME or EARTHDATA_PASSWORD environment variables are unset", call. = FALSE)
tf <- tempfile(fileext = ".nc4")
fs::path("https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2",
"M2T1NXAER.5.12.4",
format(the_date, "%Y"),
format(the_date, "%m"),
paste0("MERRA2_400.tavg1_2d_aer_Nx.", format(the_date, "%Y%m%d")),
ext = "nc4"
) |>
req_url <-
fs::path("https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2",
"M2T1NXAER.5.12.4",
format(the_date, "%Y"),
format(the_date, "%m"),
paste0("MERRA2_400.tavg1_2d_aer_Nx.", format(the_date, "%Y%m%d")),
ext = "nc4"
)
if ((format(the_date, "%Y") == "2020" & format(the_date, "%m") == "09") ||
(format(the_date, "%Y") == "2021" & format(the_date, "%m") %in% c("06", "07", "08", "09"))) {
req_url <- gsub("MERRA2_400.", "MERRA2_401.", req_url, fixed = TRUE)
}
req_url |>
httr2::request() |>
httr2::req_auth_basic(
username = earthdata_secrets["EARTHDATA_USERNAME"],
password = earthdata_secrets["EARTHDATA_PASSWORD"]
) |>
## httr2::req_progress() |>
httr2::req_retry(max_tries = 3) |>
## httr2::req_proxy("http://bmiproxyp.chmcres.cchmc.org",
## port = 80,
## username = Sys.getenv("CCHMC_USERNAME"),
## password = Sys.getenv("CCHMC_PASSWORD")
## ) |>
httr2::req_cache(tempdir()) |>
httr2::req_perform(path = tf)
out <-
tidync::tidync(tf) |>
Expand All @@ -150,6 +155,8 @@ create_daily_merra_data <- function(merra_date) {
return(out)
}

utils::globalVariables(c("DUSMASS25", "OCSMASS", "BCSMASS", "SSSMASS25",
"SO4SMASS", "merra_dust", "merra_oc", "merra_oc",
"merra_bc", "merra_ss", "merra_so4", "value"))
utils::globalVariables(c(
"DUSMASS25", "OCSMASS", "BCSMASS", "SSSMASS25",
"SO4SMASS", "merra_dust", "merra_oc", "merra_oc",
"merra_bc", "merra_ss", "merra_so4", "value"
))
2 changes: 1 addition & 1 deletion R/narr.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ install_narr_data <- function(narr_var = c("air.2m", "hpbl", "acpcp", "rhum.2m",
"Datasets", "NARR", "Dailies", "monolevel",
"{narr_var}.{narr_year}.nc",
.sep = "/"
) |> # TODO replace with httr2
) |>
utils::download.file(destfile = dest_file)
return(dest_file)
}
Expand Down
7 changes: 4 additions & 3 deletions R/nei.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ install_nei_point_data <- function(year = c("2020", "2017")) {
}
message(glue::glue("downloading {year} NEI file"))
zip_path <- fs::path(tempdir(), glue::glue("nei_{year}.zip"))
dplyr::case_when(year == "2020" ~ "https://gaftp.epa.gov/air/nei/2020/data_summaries/Facility%20Level%20by%20Pollutant.zip",
year == "2017" ~ "https://gaftp.epa.gov/air/nei/2017/data_summaries/2017v1/2017neiJan_facility.zip") |>
httr::GET(httr::write_disk(zip_path), httr::progress(), overwrite = TRUE)
dl_url <-
dplyr::case_when(year == "2020" ~ "https://gaftp.epa.gov/air/nei/2020/data_summaries/Facility%20Level%20by%20Pollutant.zip",
year == "2017" ~ "https://gaftp.epa.gov/air/nei/2017/data_summaries/2017v1/2017neiJan_facility.zip")
utils::download.file(dl_url, dest_file, quiet = FALSE)
nei_raw_paths <- utils::unzip(zip_path, exdir = tempdir())
grep(".csv", nei_raw_paths, fixed = TRUE, value = TRUE) |>
readr::read_csv(col_types = readr::cols_only(
Expand Down
2 changes: 1 addition & 1 deletion R/nlcd.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ install_impervious <- function(year = as.character(c(2019, 2016, 2013, 2011, 200
message(glue::glue("downloading {year} NLCD impervious raster"))
nlcd_zip_path <- fs::path(tempdir(), glue::glue("nlcd_impervious_{year}.zip"))
glue::glue("https://s3-us-west-2.amazonaws.com/mrlc/nlcd_{year}_impervious_l48_20210604.zip") |>
httr::GET(httr::write_disk(nlcd_zip_path, overwrite = TRUE), httr::progress())
utils::download.file(nlcd_zip_path)
nlcd_raw_paths <- utils::unzip(nlcd_zip_path, exdir = tempdir())
message(glue::glue("converting {year} NLCD impervious raster"))
system2(
Expand Down
5 changes: 1 addition & 4 deletions R/smoke.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ install_smoke_pm_data <- function() {
}
message("downloading and installing smoke PM data from source")
tf <- tempfile()
"https://www.dropbox.com/sh/atmtfc54zuknnob/AAA7AVRQP-GoIMHpxlvfN7RBa?dl=1" |>
httr2::request() |>
httr2::req_progress() |>
httr2::req_perform(path = tf)
utils::download.file("https://www.dropbox.com/sh/atmtfc54zuknnob/AAA7AVRQP-GoIMHpxlvfN7RBa?dl=1", tf)
d_smoke <-
unz(tf, grep(".csv", utils::unzip(tf, list = TRUE)$Name, value = TRUE)) |>
readr::read_csv(
Expand Down
4 changes: 1 addition & 3 deletions R/traffic.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@ install_traffic <- function() {
message("downloading and installing HPMS data from source")
dest_path <- tempfile(fileext = ".gdb.zip")
"https://www.arcgis.com/sharing/rest/content/items/c199f2799b724ffbacf4cafe3ee03e55/data" |>
httr2::request() |>
httr2::req_progress() |>
httr2::req_perform(path = dest_path)
utils::download.file(dest_path)
hpms_states <-
sf::st_layers(dsn = dest_path)$name |>
strsplit("_", fixed = TRUE) |>
Expand Down
7 changes: 6 additions & 1 deletion justfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@ upload_geo_data:
gh release upload v{{pkg_version}} "{{geomarker_folder}}/hpms_f123_aadt.rds"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/nei_2020.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/nei_2017.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2023.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2017.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2018.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2019.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2020.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2021.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2022.parquet"
gh release upload v{{pkg_version}} "{{geomarker_folder}}/merra_2023.parquet"

# train grf model
train:
Expand Down
10 changes: 5 additions & 5 deletions man/get_merra_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 16 additions & 0 deletions tests/testthat/test-merra-daily.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
if (file.exists(".env")) dotenv::load_dot_env()
earthdata_secrets <- Sys.getenv(c("EARTHDATA_USERNAME", "EARTHDATA_PASSWORD"), unset = NA)
skip_if(any(is.na(earthdata_secrets)), message = "no earthdata credentials found")

test_that("getting daily merra from GES DISC works", {
# "normal" pattern
# merra site uses 401 instead of 400 for september 2020 dates
# merra site uses 401 instead of 400 for jun, jul, aug, sep 2021 dates
out <-
list(
create_daily_merra_data("2023-05-23"),
create_daily_merra_data(merra_date = "2020-09-02"),
create_daily_merra_data(merra_date = "2021-06-16")
)
expect_equal(sapply(out, nrow), c(4800, 4800, 4800))
})
10 changes: 10 additions & 0 deletions tests/testthat/test-merra.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
test_that("get_merra_data works", {
d <- list(
"8841b39a7c46e25f" = as.Date(c("2023-05-18", "2023-11-06")),
"8841a45555555555" = as.Date(c("2023-06-22", "2023-08-15"))
)
out <- get_merra_data(x = s2::as_s2_cell(names(d)), dates = d)
expect_equal(length(out), 2)
expect_equal(lapply(out, nrow), list("8841b39a7c46e25f" = 2, "8841a45555555555" = 2))
expect_equal(out$`8841b39a7c46e25f`$merra_dust, c(1.77165780194481, 0.841950516250467))
})
Loading