diff --git a/R/pkgs/covidcommon/NAMESPACE b/R/pkgs/covidcommon/NAMESPACE index 9a9ac974a..97ac2533c 100644 --- a/R/pkgs/covidcommon/NAMESPACE +++ b/R/pkgs/covidcommon/NAMESPACE @@ -9,7 +9,9 @@ export(create_prefix) export(get_CSSE_US_data) export(get_CSSE_US_matchGlobal_data) export(get_CSSE_global_data) +export(get_LA_health_dpt_county_hosp_data) export(get_USAFacts_data) +export(get_groundtruth_from_single_source) export(get_groundtruth_from_source) export(get_hhsCMU_allHosp_st_data) export(get_hhsCMU_hospCurr_st_data) diff --git a/R/pkgs/covidcommon/R/DataUtils.R b/R/pkgs/covidcommon/R/DataUtils.R index 6a3854211..4075bbbe4 100755 --- a/R/pkgs/covidcommon/R/DataUtils.R +++ b/R/pkgs/covidcommon/R/DataUtils.R @@ -707,8 +707,51 @@ get_reichlab_cty_data <- function(cum_case_filename = "data/case_data/rlab_cum_c ##' ##' @export ##' -get_groundtruth_from_source <- function(source = "csse", scale = "US county", variables = c("Confirmed", "Deaths", "incidI", "incidDeath"), - incl_unass = FALSE, get_hosp = FALSE){ +get_groundtruth_from_source <- function(source = c("csse", "csse", "csse", "csse"), scale = "US county", variables = c("Confirmed", "Deaths", "incidI", "incidDeath"), incl_unass = FALSE, adjust_for_variant = FALSE, variant_props_file = "data/variant/variant_props_long.csv", misc_data_filename = NULL) { + df <- data.frame( + data_source = source, + variables = variables + ) + + df %>% + dplyr::group_by(data_source) %>% + dplyr::group_modify(function(.x,.y){ + return(get_groundtruth_from_single_source( + source = .y$data_source, + scale = scale, + variables = .x$variables, + incl_unass = incl_unass, + adjust_for_variant = adjust_for_variant, + variant_props_file = variant_props_file, + misc_data_filename = misc_data_filename + )) + }) %>% + return() +} + +##' +##' Wrapper function to pull data from different sources +##' +##' Pulls a groundtruth dataset with the variables specified +##' +##' @param source name of data source: reichlab, usafacts, csse +##' @param scale geographic scale: US county, US state, country (csse only), complete (csse only) +##' @param variables vector that may include one or more of the following variable names: Confirmed, Deaths, incidI, incidDeath, (hhsCMU source only: incidH_confirmed, incidH_all, hospCurr_confirmed, hospCurr_all) +##' @return data frame +##' +##' @importFrom magrittr %>% +##' +##' @export +##' +get_groundtruth_from_single_source <- function(source = "csse", scale = "US county", variables = c("Confirmed", "Deaths", "incidI", "incidDeath"), incl_unass = FALSE, adjust_for_variant = FALSE, variant_props_file = "data/variant/variant_props_long.csv", misc_data_filename = NULL) { + + if(length(source) > 1) { + stop(paste( + "get_groundtruth_from_single_source only allows a single source, but", + paste(source, collapse = ", "), + "was provided" + )) + } if(source == "reichlab" & scale == "US county"){ @@ -773,25 +816,41 @@ get_groundtruth_from_source <- function(source = "csse", scale = "US county", va } else if(source == "hhsCMU" & scale == "US state"){ - rc <- get_hhsCMU_cleanHosp_st_data() + rc <- get_hhsCMU_incidH_st_data() + rc <- dplyr::mutate(rc, FIPS = paste0(FIPS, "000")) rc <- dplyr::select(rc, Update, FIPS, source, !!variables) rc <- tidyr::drop_na(rc, tidyselect::everything()) - } else{ - warning(print(paste("The combination of ", source, "and", scale, "is not valid. Returning NULL object."))) - rc <- NULL - } - - if(get_hosp & scale == "US state") { - hosp <- get_hhsCMU_incidH_st_data() - hosp <- hosp %>% dplyr::select(-FIPS) - rc <- left_join(rc, hosp) + } else if ((source == "LA health dpt") && (scale == "US county")) { + + rc <- get_LA_health_dpt_county_hosp_data(misc_data_filename) + + } else { + warning(print(paste("The combination of ", source, "and", scale, "is not valid. Returning empty tibble."))) + rc <- dplyr::as_tibble(NULL) } + return(rc) } +##' +##' Pull LA data +##' +##' @export +get_LA_health_dpt_county_hosp_data <- function(hosp_file_name = "data/LACDPH/hospitalizations/20210813.xlsx"){ + dat <- readxl::read_xlsx(hosp_file_name) %>% + dplyr::rename(incidH = incidH_covid) %>% + dplyr::mutate(incidH = dplyr::na_if(incidH, "n/a")) %>% + dplyr::mutate(date = as.Date(date), incidH = as.numeric(incidH), + FIPS = "06037") %>% + dplyr::select(Update=date, FIPS, incidH) %>% + dplyr::ungroup() + + return(dat) +} + ##' ##' Pull CSSE US data in format similar to that of global data ##' diff --git a/R/pkgs/covidcommon/tests/testthat/test-get_groundtruth_from_source.R b/R/pkgs/covidcommon/tests/testthat/test-get_groundtruth_from_source.R index 368a6db7d..d9080ecdd 100644 --- a/R/pkgs/covidcommon/tests/testthat/test-get_groundtruth_from_source.R +++ b/R/pkgs/covidcommon/tests/testthat/test-get_groundtruth_from_source.R @@ -11,7 +11,8 @@ test_that("get_groundtruth_from_source works", { csse_st_ctyonly <- get_groundtruth_from_source(source = "csse", scale = "US state", incl_unass = FALSE) fake <- get_groundtruth_from_source(source = "fakesource") - expect_null(fake) + expect_equal(nrow(fake), + 0) usaf_cty_processed <- usaf_cty %>% dplyr::mutate(FIPS = stringr::str_sub(FIPS, 1, 2)) %>% dplyr::group_by(Update, FIPS, source) %>% diff --git a/R/pkgs/inference/R/filter_MC_runner_funcs.R b/R/pkgs/inference/R/filter_MC_runner_funcs.R index 0e3bd1467..07e606adb 100644 --- a/R/pkgs/inference/R/filter_MC_runner_funcs.R +++ b/R/pkgs/inference/R/filter_MC_runner_funcs.R @@ -64,12 +64,13 @@ aggregate_and_calc_loc_likelihoods <- function( this_location_log_likelihood <- 0 for (var in names(ground_truth_data[[location]])) { + observed_indices <- !is.na(ground_truth_data[[location]][[var]]$data_var) this_location_log_likelihood <- this_location_log_likelihood + ## Actually compute likelihood for this location and statistic here: sum(inference::logLikStat( - obs = ground_truth_data[[location]][[var]]$data_var, - sim = this_location_modeled_outcome[[var]]$sim_var, + obs = ground_truth_data[[location]][[var]]$data_var[observed_indices], + sim = this_location_modeled_outcome[[var]]$sim_var[observed_indices], dist = config$filtering$statistics[[var]]$likelihood$dist, param = config$filtering$statistics[[var]]$likelihood$param, add_one = config$filtering$statistics[[var]]$add_one diff --git a/R/pkgs/inference/R/groundtruth.R b/R/pkgs/inference/R/groundtruth.R index e4a62cb8c..84896f22e 100644 --- a/R/pkgs/inference/R/groundtruth.R +++ b/R/pkgs/inference/R/groundtruth.R @@ -2,39 +2,72 @@ #' @param data_path Path where to write the data #' @param cache logical indicating whether to cache the data (default = TRUE) #' @param gt_source string indicating source of ground truth data. options include "csse" or "usafacts" (default csse) -#' @param gt_scale string indicating whether "US county" or "US state"-level data +#' @param gt_scale string indicating whether "US county" or "US state"-level data #' #' @return NULL #' #' @export -get_ground_truth_file <- function(data_path, cache = TRUE, gt_source = "csse", gt_scale = "US county") { +get_ground_truth_file <- function(data_path, cache = TRUE, gt_source = "csse", gt_scale = "US county", gt_vars = c("Confirmed", "Deaths", "incidI", "incidDeath"), new_vars = gt_vars, fips_column_name = "geoid", date_column_name = "date", misc_data_filename = NULL) { data_dir <- dirname(data_path) + if(!dir.exists(data_dir)){ suppressWarnings(dir.create(data_dir,recursive=TRUE)) } + + if (length(gt_vars) != length(new_vars)) { + stop(paste( + "groundtruth variables and new variables should have the same number of elements, got:", + "(", paste(gt_vars, collapse = ", "), ")", + "and", + "(", paste(new_vars, collapse = ", "), ")" + )) + } + + if(!isTRUE(all.equal(gt_vars, new_vars))){ + warning("new_vars is deprecated, please adjust data_var to match gt_column_name") + } + if(!(file.exists(data_path) & cache)){ message(paste("*** Loading Data from", gt_source, "\n")) - cases_deaths <- suppressMessages(covidcommon::get_groundtruth_from_source(source = gt_source, scale = gt_scale, variables = c("Confirmed", "Deaths", "incidI", "incidDeath"), incl_unass = ifelse(gt_scale == "US state", TRUE, FALSE))) - cases_deaths <- dplyr::arrange( - dplyr::rename( - dplyr::mutate( - cases_deaths, - Update = lubridate::ymd(Update) - ), - date = Update, - cumConfirmed = Confirmed, - cumDeaths = Deaths, - confirmed_incid = incidI, - death_incid = incidDeath + cases_deaths <- suppressMessages(covidcommon::get_groundtruth_from_source( + source = gt_source, + scale = gt_scale, + variables = gt_vars, + incl_unass = ifelse(gt_scale == "US state", TRUE, FALSE), + misc_data_filename = misc_data_filename + )) + cases_deaths <- dplyr::arrange( + dplyr::mutate( + cases_deaths, + Update = lubridate::ymd(Update) ), - date + Update ) - if(any(is.na(cases_deaths$cumConfirmed))){ - cases_deaths$cumConfirmed[is.na(cases_deaths$cumConfirmed)] <- 0 + + gt_vars <- c("Update", "FIPS", gt_vars) + new_vars <- c(date_column_name, fips_column_name, new_vars) + + if(!all(gt_vars %in% names(cases_deaths))) { + stop(paste( + "Could not find all expected names. Looking for", + "(", paste(gt_vars, collapse = ", "), ")", + "found", + "(", paste(names(cases_deaths), collapse = ", "), ")" + )) } - if(any(is.na(cases_deaths$cumDeaths))){ - cases_deaths$cumDeaths[is.na(cases_deaths$cumDeaths)] <- 0 + if(!all(names(cases_deaths) %in% gt_vars)) { + warning(paste( + "Found more than the expected names. Looking for", + "(", paste(gt_vars, collapse = ", "), ")", + "found", + "(", paste(names(cases_deaths), collapse = ", "), ")", + "extra", + "(", paste(names(cases_deaths)[!(names(cases_deaths) %in% gt_vars)], collapse = ", "), ")" + )) } + names(cases_deaths)[names(cases_deaths) %in% gt_vars] <- + setNames(new_vars, gt_vars)[names(cases_deaths)[names(cases_deaths) %in% gt_vars]] + readr::write_csv(cases_deaths, data_path) rm(cases_deaths) message("*** DONE Loading Data \n") @@ -49,24 +82,79 @@ get_ground_truth_file <- function(data_path, cache = TRUE, gt_source = "csse", g #' @param data_path Path where to write the data #' #' @export -get_ground_truth <- function(data_path, fips_codes, fips_column_name, start_date, end_date, cache = TRUE, gt_source = "csse", gt_scale = "US county"){ - get_ground_truth_file(data_path = data_path, cache = cache, gt_source = gt_source, gt_scale = gt_scale) +get_ground_truth <- function( + data_path, + fips_codes = NULL, + fips_column_name = "geoid", + date_column_name = "date", + start_date = NULL, + end_date = NULL, + cache = TRUE, + gt_source = "csse", + gt_scale = "US county", + gt_vars = c("Confirmed", "Deaths", "incidI", "incidDeath"), + new_vars = gt_vars, + misc_data_filename = NULL +) { - rc <- suppressMessages(readr::read_csv(data_path,col_types = list(FIPS = readr::col_character()))) - rc <- dplyr::filter( - rc, - FIPS %in% fips_codes, - date >= start_date, - date <= end_date - ) - rc <- dplyr::right_join( - rc, - tidyr::expand_grid( - FIPS = unique(rc$FIPS), - date = unique(rc$date) - ) + get_ground_truth_file( + data_path = data_path, + cache = cache, + gt_source = gt_source, + gt_scale = gt_scale, + gt_vars = gt_vars, + new_vars = new_vars, + fips_column_name = fips_column_name, + date_column_name = date_column_name, + misc_data_filename = misc_data_filename ) - rc <- dplyr::mutate_if(rc,is.numeric,dplyr::coalesce,0) - names(rc)[names(rc) == "FIPS"] <- fips_column_name + + + rc <- suppressMessages(readr::read_csv(data_path,col_types = list(FIPS = readr::col_character()))) + + if(is.null(start_date)) { + start_date <- min(rc$date) + } + + if(is.null(end_date)) { + end_date <- max(rc$date) + } + + if (is.null(fips_codes)) { + fips_codes <- unique(rc$fips_codes) + } + + if(length(start_date)!=length(gt_vars) & length(start_date)==1){ + start_date <- rep(start_date, length(gt_vars)) + } else if(length(start_date)!=length(gt_vars)){ + warning("No start date specified for at least one of the variables; the variable will be removed from the groundtruth") + } + + if(length(end_date)!=length(gt_vars) & length(end_date)==1){ + end_date <- rep(end_date, length(gt_vars)) + } else if(length(end_date)!=length(gt_vars)){ + warning("No end date specified for at least one of the variables; the variable will be removed from the groundtruth") + } + + rc <- rc %>% + dplyr::filter(!!rlang::sym(fips_column_name) %in% fips_codes) %>% + tidyr::pivot_longer(tidyselect::all_of(new_vars)) %>% + dplyr::mutate( + start_date = lubridate::ymd(start_date[match(name,new_vars)]), + end_date = lubridate::ymd(end_date[match(name,new_vars)]) + ) %>% + dplyr::filter( + start_date <= !!rlang::sym(date_column_name), + !!rlang::sym(date_column_name) <= end_date + ) %>% + tidyr::pivot_wider(names_from = name, values_from = value) %>% + dplyr::right_join( + tidyr::expand_grid( + !!rlang::sym(fips_column_name) := fips_codes + ) + ) %>% + dplyr::filter(!is.na(!!rlang::sym(date_column_name))) %>% + dplyr::mutate(geoid = !!rlang::sym(fips_column_name)) + return(rc) } diff --git a/R/pkgs/inference/tests/testthat/test-get_ground_truth_file.R b/R/pkgs/inference/tests/testthat/test-get_ground_truth_file.R index a1a92b1c6..d9bf22c11 100644 --- a/R/pkgs/inference/tests/testthat/test-get_ground_truth_file.R +++ b/R/pkgs/inference/tests/testthat/test-get_ground_truth_file.R @@ -5,10 +5,10 @@ test_that("get_ground_truth_file creates a file",{ warning("Testing should not be using a file that already exists") file.remove(data_path) } - expect_error(get_ground_truth_file(data_path,FALSE),NA) + expect_error(get_ground_truth_file(data_path,cache=FALSE),NA) expect_equal(file.exists(data_path),TRUE) - expect_error(get_ground_truth_file(data_path,TRUE),NA) + expect_error(get_ground_truth_file(data_path,cache=TRUE),NA) expect_equal(file.exists(data_path),TRUE) }) @@ -18,16 +18,14 @@ test_that("get_ground_truth returns an appropriate data frame",{ fips_column_name <- "test_fips_column" start_date <- lubridate::ymd("2020-04-15") end_date <- lubridate::ymd("2020-04-30") - expect_error({get_ground_truth(data_path,fips_codes,fips_column_name,start_date,end_date,FALSE)},NA) - expect_error({get_ground_truth(data_path,fips_codes,fips_column_name,start_date,end_date,TRUE)},NA) + new_vars <- c("cumConfirmed", "cumDeaths", "confirmed_incid","death_incid") + expect_error({get_ground_truth(data_path = data_path,fips_codes = fips_codes,fips_column_name = fips_column_name,start_date = start_date,end_date = end_date, new_vars = new_vars, cache=FALSE)},NA) + expect_error({get_ground_truth(data_path = data_path,fips_codes = fips_codes, fips_column_name = fips_column_name,start_date = start_date,end_date = end_date, new_vars = new_vars, cache=TRUE)},NA) expect_equal({ - all(c(fips_column_name,"date","confirmed_incid","death_incid", "cumConfirmed", "cumDeaths") %in% names(get_ground_truth(data_path,fips_codes,fips_column_name,start_date,end_date,TRUE))) + all(c(fips_column_name,"date","cumConfirmed", "cumDeaths", "confirmed_incid","death_incid") %in% names(get_ground_truth(data_path = data_path,fips_codes = fips_codes, fips_column_name = fips_column_name,start_date = start_date,end_date = end_date, new_vars = new_vars, cache=TRUE))) },TRUE) - expect_equal({ - all(c(fips_column_name,"date","confirmed_incid","death_incid", "cumConfirmed", "cumDeaths") %in% names(get_ground_truth(data_path,fips_codes,fips_column_name,start_date,end_date,TRUE))) - },TRUE) - expect_gt(nrow(get_ground_truth(data_path,fips_codes,fips_column_name,start_date,end_date,TRUE)),0) - expect_equal(all(get_ground_truth(data_path,fips_codes,fips_column_name,start_date,end_date,TRUE)[[fips_column_name]] %in% fips_codes),TRUE) - expect_equal(all(get_ground_truth(data_path,fips_codes,fips_column_name,start_date,end_date,TRUE)$date >= start_date),TRUE) - expect_equal(all(get_ground_truth(data_path,fips_codes,fips_column_name,start_date,end_date,TRUE)$date <= end_date),TRUE) + expect_gt(nrow(get_ground_truth(data_path = data_path,fips_codes = fips_codes, fips_column_name = fips_column_name,start_date = start_date,end_date = end_date, new_vars = new_vars,cache=TRUE)),0) + expect_equal(all(get_ground_truth(data_path = data_path,fips_codes = fips_codes, fips_column_name = fips_column_name,start_date = start_date,end_date = end_date, new_vars = new_vars,cache=TRUE)[[fips_column_name]] %in% fips_codes),TRUE) + expect_equal(all(get_ground_truth(data_path = data_path,fips_codes = fips_codes, fips_column_name = fips_column_name,start_date = start_date,end_date = end_date, new_vars = new_vars,cache=TRUE)$date >= start_date),TRUE) + expect_equal(all(get_ground_truth(data_path = data_path,fips_codes = fips_codes, fips_column_name = fips_column_name,start_date = start_date,end_date = end_date, new_vars = new_vars,cache=TRUE)$date <= end_date),TRUE) }) diff --git a/R/scripts/create_seeding.R b/R/scripts/create_seeding.R index dfefc35ec..e2f3185c3 100644 --- a/R/scripts/create_seeding.R +++ b/R/scripts/create_seeding.R @@ -73,6 +73,7 @@ if(is.null(config$filtering$gt_source)){ } else{ gt_source <- config$filtering$gt_source } + if(is.null(config$seeding$delay_incidC)){ config$seeding$delay_incidC <- 5 } @@ -80,9 +81,27 @@ if(is.null(config$seeding$ratio_incidC)){ config$seeding$ratio_incidC <- 10 } +if(is_US_run){ + state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE) + if(state_level){ + gt_scale <- "US state" + include_unassigned <- TRUE + } else{ + gt_scale <- "US county" + include_unassigned <- FALSE + } +} else{ + gt_scale <- "US county" + include_unassigned <- FALSE +} + if(!is.null(gt_source)){ - cases_deaths <- covidcommon::get_groundtruth_from_source(source = gt_source, scale = "US county") + cases_deaths <- covidcommon::get_groundtruth_from_source(source = gt_source, scale = gt_scale, incl_unass = include_unassigned) print(paste("Successfully pulled", gt_source, "data for seeding.")) + if(is_US_run){ + cases_deaths <- cases_deaths %>% + mutate(FIPS = stringr::str_pad(FIPS, width = 5, side="right", pad="0")) + } } else { data_path <- config$filtering$data_path if(is.null(data_path)){ @@ -95,19 +114,19 @@ if(!is.null(gt_source)){ print(paste("Successfully loaded data from ", data_path, "for seeding.")) } -# Aggregation to state level if in config -if(is_US_run){ - state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE) - if(state_level){ - gt_scale <- "US state" - cases_deaths <- covidcommon::get_groundtruth_from_source(source = gt_source, scale = gt_scale, incl_unass = TRUE) - } else{ - gt_scale <- "US county" - cases_deaths <- covidcommon::get_groundtruth_from_source(source = gt_source, scale = gt_scale) - } - cases_deaths <- cases_deaths %>% - mutate(FIPS = stringr::str_pad(FIPS, width = 5, side="right", pad="0")) -} +# # Aggregation to state level if in config +# if(is_US_run){ +# state_level <- ifelse(!is.null(config$spatial_setup$state_level) && config$spatial_setup$state_level, TRUE, FALSE) +# if(state_level){ +# gt_scale <- "US state" +# cases_deaths <- covidcommon::get_groundtruth_from_source(source = gt_source, scale = gt_scale, incl_unass = TRUE) +# } else{ +# gt_scale <- "US county" +# cases_deaths <- covidcommon::get_groundtruth_from_source(source = gt_source, scale = gt_scale) +# } +# cases_deaths <- cases_deaths %>% +# mutate(FIPS = stringr::str_pad(FIPS, width = 5, side="right", pad="0")) +# } ## Check some data attributes: ## This is a hack: diff --git a/R/scripts/filter_MC.R b/R/scripts/filter_MC.R index fb60f8c40..7e6976535 100644 --- a/R/scripts/filter_MC.R +++ b/R/scripts/filter_MC.R @@ -136,35 +136,58 @@ if("priors"%in%names(config$filtering)) { ## Runner Script--------------------------------------------------------------------- ## backwards compatibility with configs that don't have filtering$gt_source parameter will use the previous default data source (USA Facts) -if(is.null(config$filtering$gt_source)){ - gt_source <- "usafacts" -} else{ - gt_source <- config$filtering$gt_source + +new_vars <- unlist(sapply(config$filtering$statistics, function(x){x$data_var})) +gt_vars <- unlist(sapply(config$filtering$statistics, function(x){x$gt_column_name})) +if (is.null(gt_vars)) { # compatility with old configs + warning("Please specify gt_column_name in each inference statistic. Defaulting to incident and cumulative confirmed cases and deaths") + gt_vars <- c("Confirmed", "Deaths", "incidI", "incidDeath") + new_vars <- c("cumConfirmed", "cumDeaths", "confirmed_incid", "death_incid") +} + +gt_source <- unlist(sapply(config$filtering$statistics, function(x){x$gt_source})) +if (is.null(gt_source)) { + + if(is.null(config$filtering$gt_source)){ + warning("Please specify gt_source in each inference statistic. Defaulting to USA Facts") + gt_source <- "usafacts" + } else{ + warning("Please specify gt_source in each inference statistic. Defaulting to global filtering source") + gt_source <- config$filtering$gt_source + } } gt_scale <- ifelse(state_level, "US state", "US county") fips_codes_ <- geodata[[obs_nodename]] -gt_start_date <- lubridate::ymd(config$start_date) +gt_start_date <- unlist(sapply(config$filtering$statistics, function(x){x$start_date})) +if(is.null(gt_start_date)) { + gt_start_date <- lubridate::ymd(config$start_date) + gt_start_date <- rep(gt_start_date, times = length(gt_vars)) +} if (opt$ground_truth_start != "") { - gt_start_date <- lubridate::ymd(opt$ground_truth_start) + gt_start_date <- pmax(gt_start_date, lubridate::ymd(opt$ground_truth_start)) } else if (!is.null(config$start_date_groundtruth)) { - gt_start_date <- lubridate::ymd(config$start_date_groundtruth) -} -if (gt_start_date < lubridate::ymd(config$start_date)) { - gt_start_date <- lubridate::ymd(config$start_date) + gt_start_date <- pmax(gt_start_date, lubridate::ymd(config$start_date_groundtruth)) } -gt_end_date <- lubridate::ymd(config$end_date) +gt_end_date <- unlist(sapply(config$filtering$statistics, function(x){x$end_date})) +if(is.null(gt_end_date)) { + gt_end_date <- lubridate::ymd(config$end_date) + gt_end_date <- rep(gt_end_date, times = length(gt_vars)) +} if (opt$ground_truth_end != "") { - gt_end_date <- lubridate::ymd(opt$ground_truth_end) + gt_end_date <- pmin(gt_end_date, lubridate::ymd(opt$ground_truth_end)) } else if (!is.null(config$end_date_groundtruth)) { - gt_end_date <- lubridate::ymd(config$end_date_groundtruth) -} -if (gt_end_date > lubridate::ymd(config$end_date)) { - gt_end_date <- lubridate::ymd(config$end_date) + gt_end_date <- pmin(gt_end_date, lubridate::ymd(config$end_date_groundtruth)) } +misc_data_filename <- unlist(sapply(config$filtering$statistics, function(x){x$misc_data_filename})) +if(is.null(misc_data_filename)) { + if(!is.null(config$filtering$misc_data_filename)){ + misc_data_filename <- config$filtering$misc_data_filename + } +} obs <- inference::get_ground_truth( data_path = data_path, @@ -173,7 +196,10 @@ obs <- inference::get_ground_truth( start_date = gt_start_date, end_date = gt_end_date, gt_source = gt_source, - gt_scale = gt_scale + gt_scale = gt_scale, + gt_vars = gt_vars, + new_vars = new_vars, + misc_data_filename = misc_data_filename ) geonames <- unique(obs[[obs_nodename]])