Skip to content

Commit

Permalink
tests and examples for geomarker functions
Browse files Browse the repository at this point in the history
  • Loading branch information
cole-brokamp committed Feb 2, 2024
1 parent 731fca6 commit 7311201
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 40 deletions.
78 changes: 43 additions & 35 deletions R/merra.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' Get MERRA-2 aerosol diagnostics data
#'
#'
#' Total and component (Dust, OC, BC, SS, SO4) surface PM2.5 concentrations
#' from the MERRA-2 [M2T1NXAER v5.12.4](https://disc.gsfc.nasa.gov/datasets/M2T1NXAER_5.12.4/summary) product.
#' @details
Expand All @@ -23,11 +23,11 @@
#' `merra_ss`, `merra_so4`, `merra_pm25`) with one row per date in `dates`
#' @export
#' @examples
#' # d <- list(
#' # "8841b39a7c46e25f" = as.Date(c("2023-05-18", "2023-11-06")),
#' # "8841a45555555555" = as.Date(c("2023-06-22", "2023-08-15"))
#' # )
#' # get_merra_data(x = s2::as_s2_cell(names(d)), dates = d)
#' d <- list(
#' "8841b39a7c46e25f" = as.Date(c("2023-05-18", "2023-11-06")),
#' "8841a45555555555" = as.Date(c("2023-06-22", "2023-08-15"))
#' )
#' get_merra_data(x = s2::as_s2_cell(names(d)), dates = d)
get_merra_data <- function(x, dates) {
if (!inherits(x, "s2_cell")) stop("x must be a s2_cell vector", call. = FALSE)
d_merra <-
Expand All @@ -46,7 +46,7 @@ get_merra_data <- function(x, dates) {
dplyr::mutate(s2_geography = s2::s2_cell_to_lnglat(s2)) |>
stats::na.omit() # some s2 failed to convert to lnglat ?

x_closest_merra <-
x_closest_merra <-
x |>
s2::s2_cell_to_lnglat() |>
s2::s2_closest_feature(d_merra$s2_geography)
Expand All @@ -57,13 +57,13 @@ get_merra_data <- function(x, dates) {

out <-
purrr::map2(x_closest_merra$data, dates,
\(xx, dd) {
tibble::tibble(date = dd) |>
dplyr::left_join(xx, by = "date") |>
dplyr::select(-date)
},
.progress = "extracting closest merra data"
)
\(xx, dd) {
tibble::tibble(date = dd) |>
dplyr::left_join(xx, by = "date") |>
dplyr::select(-date)
},
.progress = "extracting closest merra data"
)
names(out) <- as.character(x)
return(out)
}
Expand All @@ -76,15 +76,20 @@ get_merra_data <- function(x, dates) {
install_merra_data <- function(merra_year = as.character(2016:2023)) {
merra_year <- rlang::arg_match(merra_year)
dest_file <- fs::path(tools::R_user_dir("appc", "data"),
paste0(c("merra", merra_year), collapse = "_"), ext = "parquet")
if (fs::file_exists(dest_file)) return(as.character(dest_file))
paste0(c("merra", merra_year), collapse = "_"),
ext = "parquet"
)
if (fs::file_exists(dest_file)) {
return(as.character(dest_file))
}
if (!install_source_preference()) {
install_released_data(released_data_name = glue::glue("merra_{merra_year}.parquet"))
return(as.character(dest_file))
}
date_seq <- seq(as.Date(paste(c(merra_year, "01", "01"), collapse = "-")),
as.Date(paste(c(merra_year, "12", "31"), collapse = "-")),
by = 1)
as.Date(paste(c(merra_year, "12", "31"), collapse = "-")),
by = 1
)
message(glue::glue("downloading and subsetting daily MERRA files for {merra_year}"))
# takes a long time, so cache intermediate daily downloads and extractions
merra_data <- mappp::mappp(date_seq, create_daily_merra_data, cache = TRUE, cache_name = "merra_cache")
Expand All @@ -98,7 +103,7 @@ install_merra_data <- function(merra_year = as.character(2016:2023)) {

#' `create_daily_merra_data` downloads and computes MERRA PM2.5 data for a single day
#' @return for `create_daily_merra_data()`, a tibble with columns for s2,
#' date, and concentrations of PM2.5 total, dust, oc, bc, ss, so4
#' date, and concentrations of PM2.5 total, dust, oc, bc, ss, so4
#' @export
#' @rdname get_merra_data
create_daily_merra_data <- function(merra_date) {
Expand All @@ -107,25 +112,26 @@ create_daily_merra_data <- function(merra_date) {
earthdata_secrets <- Sys.getenv(c("EARTHDATA_USERNAME", "EARTHDATA_PASSWORD"), unset = NA)
if (any(is.na(earthdata_secrets))) stop("EARTHDATA_USERNAME or EARTHDATA_PASSWORD environment variables are unset", call. = FALSE)
tf <- tempfile(fileext = ".nc4")
fs::path("https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2",
"M2T1NXAER.5.12.4",
format(the_date, "%Y"),
format(the_date, "%m"),
paste0("MERRA2_400.tavg1_2d_aer_Nx.", format(the_date, "%Y%m%d")),
ext = "nc4"
) |>
req_url <-
fs::path("https://goldsmr4.gesdisc.eosdis.nasa.gov/data/MERRA2",
"M2T1NXAER.5.12.4",
format(the_date, "%Y"),
format(the_date, "%m"),
paste0("MERRA2_400.tavg1_2d_aer_Nx.", format(the_date, "%Y%m%d")),
ext = "nc4"
)
if ((format(the_date, "%Y") == "2020" & format(the_date, "%m") == "09") ||
(format(the_date, "%Y") == "2021" & format(the_date, "%m") %in% c("06", "07", "08", "09"))) {
req_url <- gsub("MERRA2_400.", "MERRA2_401.", req_url, fixed = TRUE)
}
req_url |>
httr2::request() |>
httr2::req_auth_basic(
username = earthdata_secrets["EARTHDATA_USERNAME"],
password = earthdata_secrets["EARTHDATA_PASSWORD"]
) |>
httr2::req_cache(tempdir()) |>
## httr2::req_progress() |>
httr2::req_retry(max_tries = 3) |>
## httr2::req_proxy("http://bmiproxyp.chmcres.cchmc.org",
## port = 80,
## username = Sys.getenv("CCHMC_USERNAME"),
## password = Sys.getenv("CCHMC_PASSWORD")
## ) |>
httr2::req_perform(path = tf)
out <-
tidync::tidync(tf) |>
Expand All @@ -150,6 +156,8 @@ create_daily_merra_data <- function(merra_date) {
return(out)
}

utils::globalVariables(c("DUSMASS25", "OCSMASS", "BCSMASS", "SSSMASS25",
"SO4SMASS", "merra_dust", "merra_oc", "merra_oc",
"merra_bc", "merra_ss", "merra_so4", "value"))
utils::globalVariables(c(
"DUSMASS25", "OCSMASS", "BCSMASS", "SSSMASS25",
"SO4SMASS", "merra_dust", "merra_oc", "merra_oc",
"merra_bc", "merra_ss", "merra_so4", "value"
))
10 changes: 5 additions & 5 deletions man/get_merra_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

80 changes: 80 additions & 0 deletions tests/testthat/_snaps/merra-daily.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# getting daily merra from GES DISC works

Code
create_daily_merra_data("2023-05-23")
Output
# A tibble: 4,800 x 7
merra_dust merra_oc merra_bc merra_ss merra_so4 merra_pm25 s2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <s2cell>
1 1.46 0.308 0.0468 3.89 0.446 6.31 81a3f339877d0c5f
2 1.38 0.328 0.0503 4.57 0.497 7.02 81a43f437c7a310d
3 1.34 0.338 0.0524 4.92 0.534 7.39 81a5c74e7884f30b
4 1.31 0.347 0.0546 5.44 0.580 7.95 81af5de48e848ceb
5 1.34 0.356 0.0565 5.97 0.623 8.58 81af21e856572593
6 1.48 0.367 0.0582 6.33 0.658 9.14 81aebd9a839ae4f3
7 1.68 0.387 0.0616 7.10 0.686 10.2 81ac157723ad238b
8 1.89 0.417 0.0667 8.11 0.717 11.5 81ac8d385d99a58d
9 2.00 0.458 0.0729 9.27 0.741 12.8 81acb54e797c8449
10 2.03 0.500 0.0791 10.3 0.747 13.9 805372bafbb0d335
# i 4,790 more rows

---

Code
create_daily_merra_data(merra_date = "2020-09-02")
Output
# A tibble: 4,800 x 7
merra_dust merra_oc merra_bc merra_ss merra_so4 merra_pm25 s2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <s2cell>
1 0.334 2.30 0.136 0.346 0.317 3.56 81a3f339877d0c5f
2 0.269 2.10 0.127 0.251 0.268 3.12 81a43f437c7a310d
3 0.224 1.75 0.109 0.344 0.217 2.72 81a5c74e7884f30b
4 0.212 1.25 0.0853 0.661 0.190 2.47 81af5de48e848ceb
5 0.258 1.00 0.0710 0.665 0.175 2.24 81af21e856572593
6 0.252 0.791 0.0561 0.543 0.148 1.85 81aebd9a839ae4f3
7 0.171 0.503 0.0392 0.582 0.109 1.45 81ac157723ad238b
8 0.105 0.605 0.0453 0.844 0.143 1.80 81ac8d385d99a58d
9 0.110 0.737 0.0519 1.11 0.209 2.30 81acb54e797c8449
10 0.168 0.706 0.0508 1.42 0.315 2.77 805372bafbb0d335
# i 4,790 more rows

---

Code
create_daily_merra_data(merra_date = "2021-06-16")
Output
# A tibble: 4,800 x 7
merra_dust merra_oc merra_bc merra_ss merra_so4 merra_pm25 s2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <s2cell>
1 0.197 0.135 0.0428 11.5 0.418 12.4 81a3f339877d0c5f
2 0.206 0.156 0.0462 11.6 0.443 12.6 81a43f437c7a310d
3 0.212 0.176 0.0490 11.7 0.462 12.8 81a5c74e7884f30b
4 0.218 0.195 0.0515 11.8 0.479 13.0 81af5de48e848ceb
5 0.228 0.211 0.0536 12.0 0.492 13.1 81af21e856572593
6 0.237 0.223 0.0552 12.1 0.500 13.3 81aebd9a839ae4f3
7 0.242 0.231 0.0564 12.1 0.504 13.3 81ac157723ad238b
8 0.238 0.231 0.0569 12.0 0.506 13.2 81ac8d385d99a58d
9 0.226 0.228 0.0570 11.7 0.508 12.9 81acb54e797c8449
10 0.215 0.224 0.0571 11.4 0.508 12.6 805372bafbb0d335
# i 4,790 more rows

---

Code
create_daily_merra_data(merra_date = "2021-05-21")
Output
# A tibble: 4,800 x 7
merra_dust merra_oc merra_bc merra_ss merra_so4 merra_pm25 s2
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <s2cell>
1 1.16 0.226 0.0491 9.67 0.388 11.6 81a3f339877d0c5f
2 1.26 0.218 0.0486 9.50 0.376 11.5 81a43f437c7a310d
3 1.35 0.222 0.0495 9.43 0.360 11.6 81a5c74e7884f30b
4 1.37 0.224 0.0503 9.43 0.339 11.5 81af5de48e848ceb
5 1.34 0.221 0.0504 9.27 0.316 11.3 81af21e856572593
6 1.33 0.224 0.0512 9.06 0.316 11.1 81aebd9a839ae4f3
7 1.33 0.227 0.0519 8.77 0.321 10.8 81ac157723ad238b
8 1.34 0.224 0.0516 8.41 0.323 10.5 81ac8d385d99a58d
9 1.34 0.219 0.0508 7.96 0.326 10.0 81acb54e797c8449
10 1.34 0.217 0.0504 7.50 0.328 9.55 805372bafbb0d335
# i 4,790 more rows

20 changes: 20 additions & 0 deletions tests/testthat/_snaps/merra.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# get_merra_data works

Code
get_merra_data(x = s2::as_s2_cell(names(d)), dates = d)
Output
$`8841b39a7c46e25f`
# A tibble: 2 x 6
merra_dust merra_oc merra_bc merra_ss merra_so4 merra_pm25
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1.77 6.84 0.532 0.994 2.43 13.5
2 0.842 2.65 0.392 0.244 2.21 7.17
$`8841a45555555555`
# A tibble: 2 x 6
merra_dust merra_oc merra_bc merra_ss merra_so4 merra_pm25
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1.34 2.52 0.327 0.356 3.71 9.64
2 1.18 2.80 0.441 0.722 5.78 13.1

19 changes: 19 additions & 0 deletions tests/testthat/test-merra-daily.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
if (file.exists(".env")) dotenv::load_dot_env()
earthdata_secrets <- Sys.getenv(c("EARTHDATA_USERNAME", "EARTHDATA_PASSWORD"), unset = NA)
skip_if(any(is.na(earthdata_secrets)), message = "no earthdata credentials found")

test_that("getting daily merra from GES DISC works", {
# plus these tests take a long time to download the raw merra data

# "normal" pattern
create_daily_merra_data("2023-05-23") |>
expect_snapshot()
# merra site uses 401 instead of 400 for september 2020 dates
create_daily_merra_data(merra_date = "2020-09-02") |>
expect_snapshot()
# merra site uses 401 instead of 400 for jun, jul, aug, sep 2021 dates
create_daily_merra_data(merra_date = "2021-06-16") |>
expect_snapshot()
create_daily_merra_data(merra_date = "2021-05-21") |>
expect_snapshot()
})
8 changes: 8 additions & 0 deletions tests/testthat/test-merra.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
test_that("get_merra_data works", {
d <- list(
"8841b39a7c46e25f" = as.Date(c("2023-05-18", "2023-11-06")),
"8841a45555555555" = as.Date(c("2023-06-22", "2023-08-15"))
)
get_merra_data(x = s2::as_s2_cell(names(d)), dates = d) |>
expect_snapshot()
})

0 comments on commit 7311201

Please sign in to comment.