From 19af737dbcfb9c77f09fcbad8132f3138528718e Mon Sep 17 00:00:00 2001 From: Mike Dietze Date: Mon, 19 Jun 2023 10:41:58 -0400 Subject: [PATCH 1/7] update reanalysis to generalizations in forecast, --- .../05B_SDA_Workflow_NA.reanalysis.R | 72 +++++++++++----- .../05C_SDA_Workflow_NA.forecast.R | 85 +++++++++++++------ .../inst/hf_landscape/05_SDA_Workflow_NA.R | 13 +-- .../inst/hf_landscape/06_cron.Rmd | 14 +++ 4 files changed, 132 insertions(+), 52 deletions(-) diff --git a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R index 66ea9181fda..a0af1b23f4b 100644 --- a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R +++ b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R @@ -1,22 +1,57 @@ ## Reanalysis helper script around 05_SDA_Workflow_NA + +########### CONFIGURATION ################### + ## original start date was 2022-05-17 -runDays <- seq(as.Date("2022-05-20"), as.Date("2023-05-11"), by="days") +runDays <- seq(as.Date("2023-05-30"), as.Date("2023-06-13"), by="days") FORCE = FALSE ## should we overwrite previously completed runs +## forecast configuration +projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/" ## main folder +set = readRDS(file.path(projectdir,"pecan.RDS")) +pecanhome = "/home/dietze/pecan" ## directory where pecan is installed + +## S3 bucket for output +source(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/minio_secrets.R")) +## minio_secrets contains the following: +#minio_key <- Sys.getenv("MINIO_ACCESS_KEY", "username") +#minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "password") +minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu") +minio_port <- Sys.getenv("MINIO_PORT", "9000") +minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none") + +################# Initial configuration (one time): ############################ +## * update local paths (uncomment, run once, recomment) +# set$outdir = projectdir +# for(i in seq_along(set$pfts)){ +# set$pfts[[i]]$posterior.files = file.path(projectdir,"pfts",basename(set$pfts[[i]]$posterior.files)) +# } +# set$model$binary = file.path(projectdir,"model",basename(set$model$binary)) +# set$model$jobtemplate = file.path(projectdir,"template.job") +# for(i in seq_along(set$run)){ +# set$run[[i]]$inputs$pft.site = file.path(projectdir,"site_pft.csv") +# set$run[[i]]$inputs$poolinitcond$path = file.path(projectdir,"IC",basename(set$run[[i]]$inputs$poolinitcond$path)) +# set$run[[i]]$inputs$met$path = file.path(projectdir,"GEFS") +# } +# saveRDS(set,file=file.path(projectdir,"pecan.RDS")) +## * update set$database$bety$host +## * set up separate cron jobs for input prep (met, constraints) + +########### RUN REANALYSIS #################### for (s in seq_along(runDays)) { ## did we do this run already? - now = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FNA",runDays[s]) + now = file.path(projectdir,paste0("FNA",runDays[s])) print(now) this.out = dir(file.path(now,"out"),full.names = TRUE) - if(length(this.out) > 0 & !FORCE) break + if(length(this.out) > 0 & !FORCE) next ## find previous run yesterday = runDays[s] - lubridate::days(1) - NoMet = read.csv("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/NO_MET",header=FALSE)[,1] + NoMet = read.csv(file.path(projectdir,"NO_MET"),header=FALSE)[,1] while(as.character(yesterday) %in% NoMet & yesterday - runDays[s] < lubridate::days(35) ){ yesterday = yesterday - lubridate::days(1) } - prev = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FNA",yesterday,"/") + prev = file.path(projectdir,paste0("FNA",yesterday,"/")) if(dir.exists(prev)){ ## is there output there? prev.out = dir(file.path(prev,"out"),full.names = TRUE) @@ -25,8 +60,10 @@ for (s in seq_along(runDays)) { if(min(prev.files)>0){ ######### RUN FORECAST ######## - msg = system2("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R", - paste("--start.date",runDays[s],"--prev",prev), + msg = system2(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R"), + paste("--start.date",runDays[s], + "--prev",prev, + "--settings",file.path(projectdir,"pecan.RDS")), wait=TRUE, stdout="stdout.log", stderr="stderr.log") @@ -44,17 +81,7 @@ for (s in seq_along(runDays)) { ########################################## ## push output to minio in EFI standard ## ########################################## - -## minio settings and helper functions - -source("~/pecan/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R") -source("~/pecan/modules/assim.sequential/inst/hf_landscape/minio_secrets.R") -## minio_secrets contains the following: -#minio_key <- Sys.getenv("MINIO_ACCESS_KEY", "username") -#minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "password") -minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu") -minio_port <- Sys.getenv("MINIO_PORT", "9000") -minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none") +source(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R")) # helper function for minio URIs minio_path <- function(...) paste(minio_arrow_bucket, ..., sep = "/") minio_uri <- function(...) { @@ -66,13 +93,13 @@ minio_uri_public <- function(...) { sprintf(template, minio_path(...), minio_host, ":", minio_port) } -runDays <- seq(as.Date("2022-05-18"), as.Date("2023-05-11"), by="days") +runDays <- seq(as.Date("2022-11-25"), as.Date("2023-06-13"), by="days") ## loop over dates FORCE = FALSE for (s in seq_along(runDays)) { ## did we do this run already? - now = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FNA",runDays[s]) + now = file.path(projectdir,paste0("FNA",runDays[s])) print(now) this.out = dir(file.path(now,"out"),full.names = TRUE) if(length(this.out) == 0){ @@ -115,3 +142,8 @@ for (s in seq_along(runDays)) { out %>% dplyr::group_by(reference_datetime) %>% arrow::write_dataset(minio_uri(),format="parquet") } + +## list set of uploaded dates +submitted = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% + dplyr::distinct(reference_datetime) %>% dplyr::collect() %>% as.vector() + diff --git a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R index dd6e15b88b8..34eaceb7976 100644 --- a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R +++ b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R @@ -1,13 +1,42 @@ ## Forecast helper script around 05_SDA_Workflow_NA -runDays = Sys.Date() + +## forecast configuration +projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/" ## main folder +set = readRDS(file.path(projectdir,"pecan.RDS")) +pecanhome = "/home/dietze/pecan" ## directory where pecan is installed + +## S3 bucket for output +minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu") +minio_port <- Sys.getenv("MINIO_PORT", "9000") +minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none") + +################# Initial configuration (one time): ############################ +## * update local paths (uncomment, run once, recomment) + # set$outdir = projectdir + # for(i in seq_along(set$pfts)){ + # set$pfts[[i]]$posterior.files = file.path(projectdir,"pfts",basename(set$pfts[[i]]$posterior.files)) + # } + # set$model$binary = file.path(projectdir,"model",basename(set$model$binary)) + # set$model$jobtemplate = file.path(projectdir,"template.job") + # for(i in seq_along(set$run)){ + # set$run[[i]]$inputs$pft.site = file.path(projectdir,"site_pft.csv") + # set$run[[i]]$inputs$poolinitcond$path = file.path(projectdir,"IC",basename(set$run[[i]]$inputs$poolinitcond$path)) + # set$run[[i]]$inputs$met$path = file.path(projectdir,"GEFS") + # } + # saveRDS(set,file=file.path(projectdir,"pecan.RDS")) +## * update set$database$bety$host +## * set up separate cron jobs for input prep (met, constraints) + +################### TODAY'S SETTINGS ######################## +runDays = Sys.Date() ## for test case set this to 2022-05-22 FORCE = FALSE ## should we overwrite previously completed runs ## check for missed days start_date = runDays success = FALSE -NoMet = read.csv("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/NO_MET",header=FALSE)[,1] +NoMet = read.csv(file.path(projectdir,"NO_MET"),header=FALSE)[,1] while(!success & runDays - start_date < lubridate::days(35) ){ - this.out = dir(file.path(paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/FNA",start_date),"out"),full.names = TRUE) + this.out = dir(file.path(paste0(projectdir,"/FNA",start_date),"out"),full.names = TRUE) if(length(this.out) > 0 & !FORCE) { ## this day ran successfully success = TRUE break @@ -16,21 +45,20 @@ while(!success & runDays - start_date < lubridate::days(35) ){ } runDays = seq(from=start_date,to=runDays,by="1 day") -## run forecast +##################### run forecast ##################### for (s in seq_along(runDays)) { ## did we do this run already? - now = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/FNA",runDays[s]) + now = paste0(projectdir,"/FNA",runDays[s]) print(now) this.out = dir(file.path(now,"out"),full.names = TRUE) - if(length(this.out) > 0 & !FORCE) break + if(length(this.out) > 0 & !FORCE) next ## find previous run yesterday = runDays[s] - lubridate::days(1) - NoMet = read.csv("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/NO_MET",header=FALSE)[,1] while(as.character(yesterday) %in% NoMet & yesterday - runDays[s] < lubridate::days(35) ){ yesterday = yesterday - lubridate::days(1) } - prev = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/FNA",yesterday) + prev = paste0(projectdir,"/FNA",yesterday) if(dir.exists(prev)){ ## is there output there? prev.out = dir(file.path(prev,"out"),full.names = TRUE) @@ -39,8 +67,10 @@ for (s in seq_along(runDays)) { if(min(prev.files)>0){ ######### RUN FORECAST ######## - msg = system2("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R", - paste("--start.date",runDays[s],"--prev",prev), + msg = system2(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R"), + paste("--start.date",runDays[s], + "--prev",prev, + "--settings",file.path(projectdir,"pecan.RDS")), wait=TRUE, stdout="stdout.log", stderr="stderr.log") @@ -48,8 +78,8 @@ for (s in seq_along(runDays)) { } } else { break } - } else {mc("ls minio") - ## previous run didn't occur + } else { + print("previous run didn't occur") break } @@ -60,14 +90,7 @@ for (s in seq_along(runDays)) { ########################################## ## minio settings and helper functions -source("~/pecan/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R") -source("~/pecan/modules/assim.sequential/inst/hf_landscape/minio_secrets.R") -## minio_secrets contains the following: -#minio_key <- Sys.getenv("MINIO_ACCESS_KEY", "username") -#minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "password") -minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu") -minio_port <- Sys.getenv("MINIO_PORT", "9000") -minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none") +source(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R")) # helper function for minio URIs minio_path <- function(...) paste(minio_arrow_bucket, ..., sep = "/") minio_uri <- function(...) { @@ -82,7 +105,7 @@ minio_uri_public <- function(...) { ## loop over dates for (s in seq_along(runDays)) { ## did we do this run already? - now = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/FNA",runDays[s]) + now = paste0(projectdir,"/FNA",runDays[s]) print(now) this.out = dir(file.path(now,"out"),full.names = TRUE) if(length(this.out) == 0){ @@ -91,12 +114,14 @@ for (s in seq_along(runDays)) { } ## did we write this run to minio already? - ens = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% - dplyr::filter(lubridate::as_datetime(reference_datetime) == runDays[s]) %>% - dplyr::distinct(parameter) %>% dplyr::collect() - if(length(ens$parameter)>0) { - print(paste("skipping",length(ens$parameter))) - next + if(!FORCE){ ## if not overwriting + ens = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% + dplyr::filter(lubridate::as_datetime(reference_datetime) == runDays[s]) %>% + dplyr::distinct(parameter) %>% dplyr::collect() + if(length(ens$parameter)>0) { + print(paste("skipping",length(ens$parameter))) + next + } } ## identify runs in the output folder @@ -112,6 +137,12 @@ for (s in seq_along(runDays)) { start_date = runDays[s]) } out = dplyr::bind_rows(out) + if(!is.numeric(nrow(out)) | nrow(out) == 0) next ## don't insert empty days into minio + out = out %>% relocate(parameter) %>% + relocate(site_id) %>% + relocate(time_bounds) %>% rename(datetime=time_bounds) %>% + relocate(reference_datetime) + out = tidyr::pivot_longer(out,5:ncol(out),names_to = "variable",values_to = "prediction") ## push to container in parquet format out %>% dplyr::group_by(reference_datetime) %>% arrow::write_dataset(minio_uri(),format="parquet") diff --git a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R index 6c73d37cdb2..0a4e183e14a 100755 --- a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R +++ b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R @@ -32,7 +32,8 @@ option_list = list(optparse::make_option("--start.date", default = Sys.Date(), type="character"), optparse::make_option("--prev", - default = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FNA",Sys.Date()-lubridate::days(1)), + type="character"), + optparse::make_option("--settings", type="character") ) args <- optparse::parse_args(optparse::OptionParser(option_list = option_list)) @@ -45,18 +46,18 @@ start.date = lubridate::as_date(args$start.date) #------------------------------------------------------------------------------------------------ restart <- list() restart$filepath <- args$prev -set = readRDS("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/pecan.RDS") +set = readRDS(args$settings) #set met.start & met.end end.date <- start.date + lubridate::days(35) sda.start = start.date + lubridate::days(1) +projectdir = set$outdir # -------------------------------------------------------------------------------------------------- #---------------------------------------------- NA DATA ------------------------------------- # -------------------------------------------------------------------------------------------------- #initialize obs.mean/cov NAs -## TODO: Alexis's version had two dates, need to take a closer list at what dates these should be set to site.ids <- papply(set,function(x)(x$run$site$id)) %>% unlist() %>% as.character() nsite = length(site.ids) @@ -135,7 +136,9 @@ dir.create(set$modeloutdir) dir.create(set$pfts$pft$outdir) #manually add in clim files -met_paths <- list.files(path = file.path("/projectnb/dietzelab/ahelgeso/NOAA_met_data_CH1/noaa_clim/HARV", start.date), full.names = TRUE, pattern = ".clim") +path = "/projectnb/dietzelab/ahelgeso/NOAA_met_data_CH1/noaa_clim/HARV/" ## hack +met_paths <- list.files(path = file.path(path, start.date), full.names = TRUE, pattern = ".clim") +#met_paths <- list.files(path = file.path(settings$run$settings.1$inputs$met$path, start.date), full.names = TRUE, pattern = ".clim") if(is_empty(met_paths)){ print(paste("SKIPPING: NO MET FOR",start.date)) cat(as.character(start.date),sep="\n",file=file.path(dirname(set$outdir),"NO_MET"),append=TRUE) ## add to list of dates missing met @@ -162,7 +165,7 @@ for(s in seq_along(set)){ } ## job.sh -set$model$jobtemplate = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/template.job" +if(is.null(set$model$jobtemplate)) set$model$jobtemplate = file.path(projectdir,"template.job") #save restart object save(restart, next.oldir, args, file = file.path(set$outdir, "restart.Rdata")) diff --git a/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd b/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd index 31041cde112..62ee2efec99 100644 --- a/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd +++ b/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd @@ -5,7 +5,21 @@ output: html_notebook ```{r} library(cronR) +``` + + +## Hello World +```{r} + +``` +## Met forecast (taking over for Alexis) +```{r} + +``` + + +```{r} ### NO ASSIMILATION FORECAST cmd <- cronR::cron_rscript("~/pecan/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R") cron_add(command = cmd, frequency = 'daily', at='4AM', id = 'HARV_FNA', description = "Harvard Forest landscape forecast, No Assimilated data") From 2b86b1b7e0321846eb0c9d1502f525df4e7f6fb6 Mon Sep 17 00:00:00 2001 From: Mike Dietze Date: Mon, 10 Jul 2023 10:44:12 -0400 Subject: [PATCH 2/7] tweaks to get HF landscape forecast restarted and cron jobs working. --- .../R/Analysis_sda_multiSite.R | 93 ++++++++++--------- .../assim.sequential/R/sda.enkf_MultiSite.R | 13 +-- .../05B_SDA_Workflow_NA.reanalysis.R | 6 +- .../05C_SDA_Workflow_NA.forecast.R | 2 + .../inst/hf_landscape/05_SDA_Workflow_NA.R | 21 +++-- .../inst/hf_landscape/06_cron.Rmd | 29 +++++- .../inst/hf_landscape/PEcAn2EFI.R | 5 +- modules/assim.sequential/man/MCMC_function.Rd | 4 +- modules/data.atmosphere/NAMESPACE | 1 + .../R/download_noaa_gefs_efi.R | 12 ++- ..._GEFS_EFI.Rd => download.NOAA_GEFS_EFI.Rd} | 12 ++- scripts/HARV_metdownload_efi.R | 82 +++++++++------- 12 files changed, 173 insertions(+), 107 deletions(-) rename modules/data.atmosphere/man/{download_NOAA_GEFS_EFI.Rd => download.NOAA_GEFS_EFI.Rd} (70%) mode change 100644 => 100755 scripts/HARV_metdownload_efi.R diff --git a/modules/assim.sequential/R/Analysis_sda_multiSite.R b/modules/assim.sequential/R/Analysis_sda_multiSite.R index 4b4c67f92c4..bdba0fd3e92 100644 --- a/modules/assim.sequential/R/Analysis_sda_multiSite.R +++ b/modules/assim.sequential/R/Analysis_sda_multiSite.R @@ -313,8 +313,9 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){ ceiling(elements.W.Data/length(var.names))], settings$state.data.assimilation$scalef %>% as.numeric() ) - } - + } + if(is.null(aqq)) PEcAn.logger::logger.severe("aqq undefined") + ### create matrix the describes the support for each observed state variable at time t interval <- matrix(NA, length(unlist(obs.mean[[t]])), 2) @@ -346,7 +347,7 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){ #### from the interval matrix y.ind <- as.numeric(Y > interval[, 1]) y.censored <- as.numeric(ifelse(Y > interval[, 1], Y, 0)) - data <- list(elements.W.Data = elements.W.Data, + MCMC_data <- list(elements.W.Data = elements.W.Data, X = X, Pf = Pf, aqq = aqq, @@ -359,8 +360,14 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){ nitr.GEF = extraArg$nitr.GEF, nburnin = extraArg$nburnin, nthin = extraArg$nthin, - monitors = c("Xall", "qq")) - outputs <- furrr::future_map(lapply(rep("data", as.numeric(settings$state.data.assimilation$chains)), get), MCMC_function) + monitors = c("Xall", "qq"), + t=t) + tmp = list() + for(i in seq_len(as.numeric(settings$state.data.assimilation$chains))){ + tmp[[i]] = MCMC_data + } + MCMC_data = tmp + outputs <- furrr::future_map(MCMC_data, MCMC_function) dat <- do.call(rbind, outputs) #---- Saving the chains @@ -372,6 +379,7 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){ Pa <- stats::cov(dat[, iX]) Pa[is.na(Pa)] <- 0 mq <- dat[, grep("q", colnames(dat))] # Omega, Precision + if(is.null(dim(mq))) mq = matrix(mq,ncol=1) q.bar <- matrix(apply(mq, 2, mean), length(elements.W.Data), length(elements.W.Data) @@ -420,34 +428,35 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){ ##' @title MCMC_function ##' @author Michael Dietze \email{dietze@@bu.edu}, Ann Raiho, Hamze Dokoohaki, and Dongchen Zhang. -##' @param data list containing everything needed for the MCMC sampling. +##' @param dat list containing everything needed for the MCMC sampling. ##' @details This function replace the previous code where implenmented the MCMC sampling part, which allows the MCMC sampling of multiple chains under parallel mode. -MCMC_function <- function(data){ - dimensions.tobit <- list(X = length(data$elements.W.Data), - X.mod = ncol(data$X), - Q = c(nrow(data$aqq), ncol(data$aqq)) +MCMC_function <- function(dat){ + t = dat$t + dimensions.tobit <- list(X = length(dat$elements.W.Data), + X.mod = ncol(dat$X), + Q = c(nrow(dat$aqq), ncol(dat$aqq)) ) # Contants defined in the model constants.tobit <- list( - N = ncol(data$X), - YN = length(data$elements.W.Data), - nH = length(data$elements.W.Data), - H = data$elements.W.Data, - NotH = which(!(1:ncol(data$X) %in% data$elements.W.Data)), - nNotH = which(!(1:ncol(data$X) %in% data$elements.W.Data)) %>% length(), - q.type=data$q.type + N = ncol(dat$X), + YN = length(dat$elements.W.Data), + nH = length(dat$elements.W.Data), + H = dat$elements.W.Data, + NotH = which(!(1:ncol(dat$X) %in% dat$elements.W.Data)), + nNotH = which(!(1:ncol(dat$X) %in% dat$elements.W.Data)) %>% length(), + q.type=dat$q.type ) # Data used for setting the likelihood and other stuff data.tobit <- list( - muf = as.vector(data$mu.f), - pf = data$Pf, - aq = data$aqq[,,t], - bq = data$bqq[t], - y.ind = data$y.ind, - y.censored = data$y.censored, - r = solve(data$R) + muf = as.vector(dat$mu.f), + pf = dat$Pf, + aq = dat$aqq[,,t], + bq = dat$bqq[t], + y.ind = dat$y.ind, + y.censored = dat$y.censored, + r = solve(dat$R) ) if(constants.tobit$YN == 1){ #add error message if trying to run SDA with 1 obs and 1 state variable no model currently exists to handle this case, need to remove for loop from GEF_singleobs_nimble for this case and save new model @@ -462,11 +471,11 @@ MCMC_function <- function(data){ constants.tobit$q.type <- NULL inits.pred <- list( - X.mod = as.vector(data$mu.f), - X = as.vector(data$mu.f)[data$elements.W.Data], - Xall = as.vector(data$mu.f), - Xs = as.vector(data$mu.f)[data$elements.W.Data], - q = diag(1, length(data$elements.W.Data), length(data$elements.W.Data)) + X.mod = as.vector(dat$mu.f), + X = as.vector(dat$mu.f)[dat$elements.W.Data], + Xall = as.vector(dat$mu.f), + Xs = as.vector(dat$mu.f)[dat$elements.W.Data], + q = diag(1, length(dat$elements.W.Data), length(dat$elements.W.Data)) ) model_pred <- nimble::nimbleModel(GEF_singleobs_nimble, data = data.tobit, @@ -483,16 +492,16 @@ MCMC_function <- function(data){ } ## Adding X.mod,q,r as data for building model. conf <- nimble::configureMCMC(model_pred, print=TRUE) - conf$setMonitors(data$monitors) + conf$setMonitors(dat$monitors) samplerNumberOffset <- length(conf$getSamplers()) - for(i in 1:length(data$y.ind)) { + for(i in 1:length(dat$y.ind)) { node <- paste0('y.censored[',i,']') conf$addSampler(node, 'toggle', control=list(type='RW')) } #handling samplers samplerLists <- conf$getSamplers() - samplerLists[[2]]$control <- list(propCov= data$Pf, adaptScaleOnly = TRUE, adaptive = TRUE) + samplerLists[[2]]$control <- list(propCov= dat$Pf, adaptScaleOnly = TRUE, adaptive = TRUE) conf$setSamplers(samplerLists) conf$printSamplers() @@ -500,22 +509,22 @@ MCMC_function <- function(data){ Cmodel <- nimble::compileNimble(model_pred) Cmcmc <- nimble::compileNimble(Rmcmc, project = model_pred, showCompilerOutput = TRUE) - for(i in 1:length(data$y.ind)) { - valueInCompiledNimbleFunction(Cmcmc$samplerFunctions[[samplerNumberOffset+i]], 'toggle', 1-data$y.ind[i]) + for(i in 1:length(dat$y.ind)) { + valueInCompiledNimbleFunction(Cmcmc$samplerFunctions[[samplerNumberOffset+i]], 'toggle', 1-dat$y.ind[i]) } inits <- function(){ - ind <- sample(seq_along(1:nrow(data$X)), 1) - init_muf <- data$X[ind,] + ind <- sample(seq_along(1:nrow(dat$X)), 1) + init_muf <- dat$X[ind,] list(X.mod = as.vector(init_muf), - X = as.vector(init_muf)[data$elements.W.Data], + X = as.vector(init_muf)[dat$elements.W.Data], Xall = as.vector(init_muf), - Xs = as.vector(init_muf)[data$elements.W.Data], - q = diag(1, length(data$elements.W.Data), length(data$elements.W.Data))) + Xs = as.vector(init_muf)[dat$elements.W.Data], + q = diag(1, length(dat$elements.W.Data), length(dat$elements.W.Data))) } if(exists("inits.pred")){ - dat <- runMCMC(Cmcmc, niter = data$nitr.GEF, nburnin = data$nburnin, thin = data$nthin, nchains = 1) + dat <- runMCMC(Cmcmc, niter = dat$nitr.GEF, nburnin = dat$nburnin, thin = dat$nthin, nchains = 1) }else{ - dat <- runMCMC(Cmcmc, niter = data$nitr.GEF, nburnin = data$nburnin, thin = data$nthin, nchains = 1, inits = inits) + dat <- runMCMC(Cmcmc, niter = dat$nitr.GEF, nburnin = dat$nburnin, thin = dat$nthin, nchains = 1, inits = inits) } return(dat) -} \ No newline at end of file +} diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index a1614aa93af..215b799fc37 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -82,12 +82,7 @@ sda.enkf.multisite <- function(settings, forecast.time.step <- settings$state.data.assimilation$forecast.time.step #idea for later generalizing nens <- as.numeric(settings$ensemble$size) - processvar <- settings$state.data.assimilation$process.variance - if(processvar=="TRUE"){ - processvar <- TRUE - }else{ - processvar <- FALSE - } + processvar <- as.logical(settings$state.data.assimilation$process.variance) Localization.FUN <- settings$state.data.assimilation$Localization.FUN # localization function scalef <- settings$state.data.assimilation$scalef %>% as.numeric() # scale factor for localization var.names <- sapply(settings$state.data.assimilation$state.variable, '[[', "variable.name") @@ -113,6 +108,7 @@ sda.enkf.multisite <- function(settings, TRUE, settings$state.data.assimilation$censored.data %>% as.logical) + if(is.null(settings$state.data.assimilation$chains)) settings$state.data.assimilation$chains = 3 #--------Initialization FORECAST <- ANALYSIS <- ens_weights <- list() enkf.params <- list() @@ -132,6 +128,11 @@ sda.enkf.multisite <- function(settings, if(multi.site.flag){ conf.settings<-settings site.ids <- conf.settings$run %>% purrr::map('site') %>% purrr::map('id') %>% base::unlist() %>% base::as.character() + if(length(site.ids)==0){ ### these hacks should not be nessisary + for(s in seq_along(conf.settings$run)){ + site.ids[s] = conf.settings[[s]]$run[[s]]$site$id + } + } # a matrix ready to be sent to spDistsN1 in sp package - first col is the long second is the lat and row names are the site ids site.locs <- conf.settings$run %>% purrr::map('site') %>% purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>% t %>% diff --git a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R index a0af1b23f4b..8395b60af95 100644 --- a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R +++ b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R @@ -3,7 +3,7 @@ ########### CONFIGURATION ################### ## original start date was 2022-05-17 -runDays <- seq(as.Date("2023-05-30"), as.Date("2023-06-13"), by="days") +runDays <- seq(as.Date("2023-07-06"), as.Date("2023-07-08"), by="days") FORCE = FALSE ## should we overwrite previously completed runs ## forecast configuration @@ -93,7 +93,7 @@ minio_uri_public <- function(...) { sprintf(template, minio_path(...), minio_host, ":", minio_port) } -runDays <- seq(as.Date("2022-11-25"), as.Date("2023-06-13"), by="days") +runDays <- seq(as.Date("2023-06-22"), as.Date("2023-07-10"), by="days") ## loop over dates FORCE = FALSE @@ -146,4 +146,4 @@ for (s in seq_along(runDays)) { ## list set of uploaded dates submitted = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% dplyr::distinct(reference_datetime) %>% dplyr::collect() %>% as.vector() - +sort(submitted$reference_datetime) diff --git a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R index 34eaceb7976..3cc2c17a6b5 100644 --- a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R +++ b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R @@ -1,4 +1,6 @@ +#!/usr/bin/env Rscript ## Forecast helper script around 05_SDA_Workflow_NA +.libPaths(c("/projectnb/dietzelab/dietze/test-pecan/R/library",.libPaths())) ## forecast configuration projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/" ## main folder diff --git a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R index 0a4e183e14a..d54acb9e19b 100755 --- a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R +++ b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R @@ -4,7 +4,9 @@ # ---------------------------------------------------------------------- #------------------------------------------ Load required libraries----- # ---------------------------------------------------------------------- +.libPaths(c("/projectnb/dietzelab/dietze/test-pecan/R/library",.libPaths())) library("PEcAn.all") +library("PEcAn.settings") library("PEcAn.utils") library("PEcAn.data.remote") library("PEcAnAssimSequential") @@ -58,7 +60,7 @@ projectdir = set$outdir # -------------------------------------------------------------------------------------------------- #initialize obs.mean/cov NAs -site.ids <- papply(set,function(x)(x$run$site$id)) %>% unlist() %>% as.character() +site.ids <- PEcAn.settings::papply(set,function(x)(x$run$site$id)) %>% unlist() %>% as.character() nsite = length(site.ids) NAdata = data.frame(date = c(rep(start.date,nsite),rep(sda.start,nsite)), @@ -130,16 +132,16 @@ set$pfts$pft$outdir = file.path(set$outdir,"pft") set$host$rundir <- set$rundir set$host$outdir <- set$modeloutdir set$host$folder <- set$modeloutdir -dir.create(set$outdir) -dir.create(set$rundir) -dir.create(set$modeloutdir) -dir.create(set$pfts$pft$outdir) +dir.create(set$outdir,showWarnings = FALSE) +dir.create(set$rundir,showWarnings = FALSE) +dir.create(set$modeloutdir,showWarnings = FALSE) +dir.create(set$pfts$pft$outdir,showWarnings = FALSE) #manually add in clim files path = "/projectnb/dietzelab/ahelgeso/NOAA_met_data_CH1/noaa_clim/HARV/" ## hack met_paths <- list.files(path = file.path(path, start.date), full.names = TRUE, pattern = ".clim") #met_paths <- list.files(path = file.path(settings$run$settings.1$inputs$met$path, start.date), full.names = TRUE, pattern = ".clim") -if(is_empty(met_paths)){ +if(purrr::is_empty(met_paths)){ print(paste("SKIPPING: NO MET FOR",start.date)) cat(as.character(start.date),sep="\n",file=file.path(dirname(set$outdir),"NO_MET"),append=TRUE) ## add to list of dates missing met stop_quietly() @@ -159,7 +161,7 @@ rownames(run_id) <- NULL run_id = as.data.frame(run_id) %>% mutate(folder=prev_run_ids,id = paste0("id",.data$ens)) %>% group_by(site) ###settings$runs$id = run_id for(s in seq_along(set)){ - site_run_id = run_id %>% filter(site == set[[s]]$run$site$id) + site_run_id = run_id |> filter(site == as.list(set$run[[s]]$site$id)[[1]]) set[[s]]$run$id = as.list(site_run_id$folder) names(set[[s]]$run$id) = site_run_id$id } @@ -177,6 +179,8 @@ sda.enkf.multisite(settings = set, restart = restart, forceRun = TRUE, keepNC = TRUE, + run_parallel = FALSE, + parallel_qsub = FALSE, control = list(trace = TRUE, FF = FALSE, interactivePlot = FALSE, @@ -187,7 +191,8 @@ sda.enkf.multisite(settings = set, debug = FALSE, pause = FALSE, Profiling = FALSE, - OutlierDetection=FALSE)) + OutlierDetection=FALSE, + free_run = FALSE)) ## seems to be defined twice diff --git a/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd b/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd index 62ee2efec99..7543c28444b 100644 --- a/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd +++ b/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd @@ -10,18 +10,41 @@ library(cronR) ## Hello World ```{r} - +cmd <- cronR::cron_rscript("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/HelloWorld.R") +cron_add(command = cmd, frequency = '*/5 * * * *', id = 'HelloWorld', description = "Every 5 min") +cron_njobs() +cron_ls() +## wait for cron to run successfully then remove from queue +cron_rm("HelloWorld") ``` ## Met forecast (taking over for Alexis) ```{r} +cmd <- cronR::cron_rscript("/home/dietze/pecan/scripts/HARV_metdownload_efi.R") +cron_add(command = cmd, frequency = '0 */6 * * *', id = 'NOAA_GEFS', description = "NOAA GEFS forecasts via EFI server; update every 6 hr") +cron_njobs() +cron_ls() ``` +## manual catchup on met +```{r} +runDays <- seq(as.Date("2023-06-24"), as.Date("2023-07-09"), by="days") +for (s in seq_along(runDays)) { + msg = system2(file.path(pecanhome,"scripts/HARV_metdownload_efi.R"), + paste("--start.date",runDays[s]), + wait=TRUE)#, + # stdout="stdout.log", + # stderr="stderr.log") + print(msg) +} +``` + + ```{r} ### NO ASSIMILATION FORECAST -cmd <- cronR::cron_rscript("~/pecan/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R") +cmd <- cronR::cron_rscript("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R") cron_add(command = cmd, frequency = 'daily', at='4AM', id = 'HARV_FNA', description = "Harvard Forest landscape forecast, No Assimilated data") cron_njobs() cron_ls() @@ -30,7 +53,7 @@ cron_ls() ## SMAP DATA DOWNLOAD ```{r} -cmd <- cronR::cron_rscript("~/pecan/modules/assim.sequential/inst/hf_landscape/07_SMAP.R") +cmd <- cronR::cron_rscript("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/07_SMAP.R") cronR::cron_add(command = cmd, frequency = 'daily', at='11PM', id = 'SMAP', description = "SMAP download") cronR::cron_njobs() cronR::cron_ls() diff --git a/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R b/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R index 304e2ecd245..4bace0a4de4 100644 --- a/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R +++ b/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R @@ -85,9 +85,10 @@ PEcAn2EFI.ens <- function(outdir,run.id,start_date,end_date = NULL){ PEcAn.logger::logger.info("reading ensemble output from run id: ", format(run.id, scientific = FALSE)) ensemble.output <- PEcAn.utils::read.output(run.id, file.path(outdir, run.id), start.year, end.year, variables=NULL) - if(!is.numeric(nrow(ensemble.output))) return(NULL) - + if(length(ensemble.output)==0) return(NULL) + ## reprocess time_bounds + if(length(dim(ensemble.output$time_bounds)<2)) ensemble.output$time_bounds = matrix(ensemble.output$time_bounds,nrow=2) doy = as.vector(ensemble.output$time_bounds[1,]) ## day of year years = c(which(doy < 0.00001),length(doy)+1) ## get breaks between years if(years[1] != 1) years = c(1,years) ## add current year if not start of year diff --git a/modules/assim.sequential/man/MCMC_function.Rd b/modules/assim.sequential/man/MCMC_function.Rd index 7f32ab3faf6..bce56e3ae5e 100644 --- a/modules/assim.sequential/man/MCMC_function.Rd +++ b/modules/assim.sequential/man/MCMC_function.Rd @@ -4,10 +4,10 @@ \alias{MCMC_function} \title{MCMC_function} \usage{ -MCMC_function(data) +MCMC_function(dat) } \arguments{ -\item{data}{list containing everything needed for the MCMC sampling.} +\item{dat}{list containing everything needed for the MCMC sampling.} } \description{ MCMC_function diff --git a/modules/data.atmosphere/NAMESPACE b/modules/data.atmosphere/NAMESPACE index 7183782887f..39e2937891b 100644 --- a/modules/data.atmosphere/NAMESPACE +++ b/modules/data.atmosphere/NAMESPACE @@ -36,6 +36,7 @@ export(download.NARR_site) export(download.NEONmet) export(download.NLDAS) export(download.NOAA_GEFS) +export(download.NOAA_GEFS_EFI) export(download.PalEON) export(download.PalEON_ENS) export(download.US_WCr) diff --git a/modules/data.atmosphere/R/download_noaa_gefs_efi.R b/modules/data.atmosphere/R/download_noaa_gefs_efi.R index 0483b8c3331..c01c98007c8 100644 --- a/modules/data.atmosphere/R/download_noaa_gefs_efi.R +++ b/modules/data.atmosphere/R/download_noaa_gefs_efi.R @@ -1,4 +1,4 @@ -#' download_NOAA_GEFS_EFI +#' download.NOAA_GEFS_EFI #' #' @param start_date start date for met forecast #' @param sitename NEON site name @@ -10,8 +10,10 @@ #' #' #' @author Alexis Helgeson +#' @author Mike Dietze +#' @export #' -download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, site.lon){ +download.NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, site.lon){ #using the stage2 fcn mean that the met as already been downscaled and gapfilled to 1 hr intervals met = PEcAn.data.atmosphere::noaa_stage2(cycle = 0, version = "v12", @@ -20,9 +22,9 @@ download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, si start_date = start_date) weather = met %>% - dplyr::filter(.data$reference_datetime == as.POSIXct(start_date,tz="UTC"), sitename == sitename) %>% + dplyr::filter(.data$reference_datetime == as.POSIXct(start_date,tz="UTC"), site_id == sitename) %>% dplyr::collect() %>% - dplyr::select(.data$sitename, .data$prediction, .data$variable, .data$horizon, .data$parameter, .data$datetime) + dplyr::select(.data$site_id, .data$prediction, .data$variable, .data$horizon, .data$parameter, .data$datetime) PEcAn.logger::logger.info("Met Aquired for", sitename, "on", as.character(start_date)) #grab/calculate timestep, this might not be necessary b/c of the datetime column? @@ -55,7 +57,7 @@ download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, si #adding in windspeed and specific humidity cf_var_names1 <- c("surface_downwelling_longwave_flux_in_air", "surface_downwelling_shortwave_flux_in_air", "precipitation_flux", "air_pressure", "relative_humidity", "air_temperature", "specific_humidity", "wind_speed") cf_var_units1 <- c("Wm-2", "Wm-2", "kgm-2s-1", "Pa", "1", "K", "1", "ms-1") - #calculate specific humdity using realtive humidity (no unit conversion requied as relative humidity is in range 0-1), air temperature (no unit conversion already in K), and air pressure (no unit conversion already in Pa) + #calculate specific humdity using relative humidity (no unit conversion requied as relative humidity is in range 0-1), air temperature (no unit conversion already in K), and air pressure (no unit conversion already in Pa) specific_humidity <- rep(NA, length(noaa_data$relative_humidity$value)) specific_humidity[which(!is.na(noaa_data$relative_humidity$value))] <- PEcAn.data.atmosphere::rh2qair(rh = noaa_data$relative_humidity$value[which(!is.na(noaa_data$relative_humidity$value))], T = noaa_data$air_temperature$value[which(!is.na(noaa_data$relative_humidity$value))], diff --git a/modules/data.atmosphere/man/download_NOAA_GEFS_EFI.Rd b/modules/data.atmosphere/man/download.NOAA_GEFS_EFI.Rd similarity index 70% rename from modules/data.atmosphere/man/download_NOAA_GEFS_EFI.Rd rename to modules/data.atmosphere/man/download.NOAA_GEFS_EFI.Rd index e6f23a6699d..eed1d3b757d 100644 --- a/modules/data.atmosphere/man/download_NOAA_GEFS_EFI.Rd +++ b/modules/data.atmosphere/man/download.NOAA_GEFS_EFI.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/download_noaa_gefs_efi.R -\name{download_NOAA_GEFS_EFI} -\alias{download_NOAA_GEFS_EFI} -\title{download_NOAA_GEFS_EFI} +\name{download.NOAA_GEFS_EFI} +\alias{download.NOAA_GEFS_EFI} +\title{download.NOAA_GEFS_EFI} \usage{ -download_NOAA_GEFS_EFI(sitename, outfolder, start_date, site.lat, site.lon) +download.NOAA_GEFS_EFI(sitename, outfolder, start_date, site.lat, site.lon) } \arguments{ \item{sitename}{NEON site name} @@ -21,8 +21,10 @@ download_NOAA_GEFS_EFI(sitename, outfolder, start_date, site.lat, site.lon) message confirming download complete and location of .nc files } \description{ -download_NOAA_GEFS_EFI +download.NOAA_GEFS_EFI } \author{ Alexis Helgeson + +Mike Dietze } diff --git a/scripts/HARV_metdownload_efi.R b/scripts/HARV_metdownload_efi.R old mode 100644 new mode 100755 index abab73d3a39..54684ae6d09 --- a/scripts/HARV_metdownload_efi.R +++ b/scripts/HARV_metdownload_efi.R @@ -1,44 +1,64 @@ +#!/usr/bin/env Rscript #load libraries +.libPaths(c("/projectnb/dietzelab/dietze/test-pecan/R/library",.libPaths())) library(dplyr) -#load fcns -source("/projectnb/dietzelab/ahelgeso/Forecast_Scripts/download_noaa_gefs_efi.R") -source("/projectnb/dietzelab/ahelgeso/Forecast_Scripts/noaa_gefs_efi_helper.R") +option_list = list(optparse::make_option("--start.date", + default = Sys.Date(), + type="character"), + optparse::make_option("--jumpback", + default = 10, + type="integer") +) +args <- optparse::parse_args(optparse::OptionParser(option_list = option_list)) +if(is.null(args$jumpback)) args$jumpback = 10 +jumpback = args$jumpback + #set fcn inputs -startdate = format(Sys.Date(), "%Y-%m-%d") +startdate = as.Date(args$start.date) nc_dir = "/projectnb/dietzelab/ahelgeso/NOAA_met_data/" site.lat = 42.5 site.lon = -72.15 sitename <- "HARV" siteid <- 646 clim_dir <- "/projectnb/dietzelab/ahelgeso/NOAA_met_data_CH1/" +runDays <- format(seq(from=as.Date(startdate - lubridate::days(jumpback)), + to=as.Date(as.character(startdate)), + by="days"),"%Y-%m-%d") +print("args set") -#download met using EFI fcns -download_NOAA_GEFS_EFI(sitename = sitename, - outfolder = nc_dir, - start_date = startdate, - site.lat = site.lat, - site.lon = site.lon) -#set up path for met2model -output_path <- file.path("/projectnb/dietzelab/ahelgeso/NOAA_met_data/noaa/NOAAGEFS_1hr", sitename, startdate, "00") -########## Met2Model For SIPNET ############## -outfolder = file.path(clim_dir, "noaa_clim", sitename, startdate) -if(!dir.exists(outfolder)){dir.create(outfolder, recursive = TRUE)} - -in.path = output_path -in.prefix = list.files(output_path) - -end_date = as.Date(startdate) + lubridate::days(35) - -for(l in 1:length(in.prefix)){ +for(t in seq_along(runDays)){ + print(runDays[t]) + + #download met using EFI fcns + tmp = PEcAn.data.atmosphere:::download.NOAA_GEFS_EFI(sitename = sitename, + outfolder = nc_dir, + start_date = runDays[t], + site.lat = site.lat, + site.lon = site.lon) + #set up path for met2model + output_path <- file.path("/projectnb/dietzelab/ahelgeso/NOAA_met_data/noaa/NOAAGEFS_1hr", sitename, runDays[t], "00") + ########## Met2Model For SIPNET ############## + outfolder = file.path(clim_dir, "noaa_clim", sitename, runDays[t]) + if(!dir.exists(outfolder)){dir.create(outfolder, recursive = TRUE)} + + in.path = output_path + in.prefix = list.files(output_path) + + end_date = as.Date(runDays[t]) + lubridate::days(35) - PEcAn.SIPNET::met2model.SIPNET(in.path = in.path, - in.prefix = in.prefix[l], - outfolder = outfolder, - start_date = startdate, - end_date = end_date, - overwrite = FALSE, - verbose = FALSE, - year.fragment = TRUE) + for(l in 1:length(in.prefix)){ + + PEcAn.SIPNET::met2model.SIPNET(in.path = in.path, + in.prefix = in.prefix[l], + outfolder = outfolder, + start_date = runDays[t], + end_date = end_date, + overwrite = FALSE, + verbose = FALSE, + year.fragment = TRUE) + + } -} +} +print("done") From 5512ae7f4061a792c277eaa5167eaeac667339c4 Mon Sep 17 00:00:00 2001 From: Mike Dietze Date: Mon, 10 Jul 2023 15:27:16 -0400 Subject: [PATCH 3/7] fix run.write.configs to allow reuse of previous ensemble samples. Added so that parameters are not resampled site-by-site in a multi-site run. --- base/workflow/R/run.write.configs.R | 108 ++++++++++-------- base/workflow/R/runModule.run.write.configs.R | 12 +- base/workflow/man/run.write.configs.Rd | 5 +- .../man/runModule.run.write.configs.Rd | 8 +- 4 files changed, 80 insertions(+), 53 deletions(-) diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R index 70ed76f7f51..c7329858d63 100644 --- a/base/workflow/R/run.write.configs.R +++ b/base/workflow/R/run.write.configs.R @@ -11,6 +11,7 @@ #' @param posterior.files Filenames for posteriors for drawing samples for ensemble and sensitivity #' analysis (e.g. post.distns.Rdata, or prior.distns.Rdata) #' @param overwrite logical: Replace output files that already exist? +#' @param use.existing.samples FALSE (default), TRUE, or filename #' #' @details The default value for \code{posterior.files} is NA, in which case the #' most recent posterior or prior (in that order) for the workflow is used. @@ -24,7 +25,7 @@ #' @author David LeBauer, Shawn Serbin, Ryan Kelly, Mike Dietze run.write.configs <- function(settings, write = TRUE, ens.sample.method = "uniform", posterior.files = rep(NA, length(settings$pfts)), - overwrite = TRUE) { + overwrite = TRUE,use.existing.samples=FALSE) { tryCatch({ con <- PEcAn.DB::db.open(settings$database$bety) on.exit(PEcAn.DB::db.close(con), add = TRUE) @@ -34,57 +35,68 @@ run.write.configs <- function(settings, write = TRUE, ens.sample.method = "unifo conditionMessage(e)) }) - ## Which posterior to use? - for (i in seq_along(settings$pfts)) { - ## if posterior.files is specified us that - if (is.na(posterior.files[i])) { - ## otherwise, check to see if posteriorid exists - if (!is.null(settings$pfts[[i]]$posteriorid)) { - #TODO: sometimes `files` is a 0x0 tibble and other operations with it fail. - files <- PEcAn.DB::dbfile.check("Posterior", - settings$pfts[[i]]$posteriorid, - con, settings$host$name, return.all = TRUE) - pid <- grep("post.distns.*Rdata", files$file_name) ## is there a posterior file? - if (length(pid) == 0) { - pid <- grep("prior.distns.Rdata", files$file_name) ## is there a prior file? - } - if (length(pid) > 0) { - posterior.files[i] <- file.path(files$file_path[pid], files$file_name[pid]) - } ## otherwise leave posteriors as NA - } - ## otherwise leave NA and get.parameter.samples will look for local - } else { - ## does posterior.files point to a directory instead of a file? - if(utils::file_test("-d",posterior.files[i])){ - pfiles = dir(posterior.files[i],pattern = "post.distns.*Rdata",full.names = TRUE) - if(length(pfiles)>1){ - pid = grep("post.distns.Rdata",pfiles) - if(length(pid > 0)){ - pfiles = pfiles[grep("post.distns.Rdata",pfiles)] - } else { - PEcAn.logger::logger.error( - "run.write.configs: could uniquely identify posterior files within", - posterior.files[i]) - } - posterior.files[i] = pfiles - } - } - ## also, double check PFT outdir exists - if (is.null(settings$pfts[[i]]$outdir) || is.na(settings$pfts[[i]]$outdir)) { - ## no outdir - settings$pfts[[i]]$outdir <- file.path(settings$outdir, "pfts", settings$pfts[[i]]$name) - } - } ## end else - } ## end for loop over pfts - - ## Sample parameters model <- settings$model$type scipen <- getOption("scipen") options(scipen = 12) - - PEcAn.uncertainty::get.parameter.samples(settings, posterior.files, ens.sample.method) - load(file.path(settings$outdir, "samples.Rdata")) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples + ## Which posterior to use? + if(is.logical(use.existing.samples)){ + if(use.existing.samples & file.exists(file.path(settings$outdir, "samples.Rdata"))){ + load(file.path(settings$outdir, "samples.Rdata")) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples + } else { + for (i in seq_along(settings$pfts)) { + ## if posterior.files is specified us that + if (is.na(posterior.files[i])) { + ## otherwise, check to see if posteriorid exists + if (!is.null(settings$pfts[[i]]$posteriorid)) { + #TODO: sometimes `files` is a 0x0 tibble and other operations with it fail. + files <- PEcAn.DB::dbfile.check("Posterior", + settings$pfts[[i]]$posteriorid, + con, settings$host$name, return.all = TRUE) + pid <- grep("post.distns.*Rdata", files$file_name) ## is there a posterior file? + if (length(pid) == 0) { + pid <- grep("prior.distns.Rdata", files$file_name) ## is there a prior file? + } + if (length(pid) > 0) { + posterior.files[i] <- file.path(files$file_path[pid], files$file_name[pid]) + } ## otherwise leave posteriors as NA + } + ## otherwise leave NA and get.parameter.samples will look for local + } else { + ## does posterior.files point to a directory instead of a file? + if(utils::file_test("-d",posterior.files[i])){ + pfiles = dir(posterior.files[i],pattern = "post.distns.*Rdata",full.names = TRUE) + if(length(pfiles)>1){ + pid = grep("post.distns.Rdata",pfiles) + if(length(pid > 0)){ + pfiles = pfiles[grep("post.distns.Rdata",pfiles)] + } else { + PEcAn.logger::logger.error( + "run.write.configs: could uniquely identify posterior files within", + posterior.files[i]) + } + posterior.files[i] = pfiles + } + } + ## also, double check PFT outdir exists + if (is.null(settings$pfts[[i]]$outdir) || is.na(settings$pfts[[i]]$outdir)) { + ## no outdir + settings$pfts[[i]]$outdir <- file.path(settings$outdir, "pfts", settings$pfts[[i]]$name) + } + } ## end else + } ## end for loop over pfts + + ## Sample parameters + PEcAn.uncertainty::get.parameter.samples(settings, posterior.files, ens.sample.method) + load(file.path(settings$outdir, "samples.Rdata")) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples + + ## end use.existing.samples == FALSE + } ## is.logical(use.existing.samples) + } else { + ### use.existing.samples must be a filename + load(use.existing.samples) + } + ## remove previous runs.txt if (overwrite && file.exists(file.path(settings$rundir, "runs.txt"))) { PEcAn.logger::logger.warn("Existing runs.txt file will be removed.") diff --git a/base/workflow/R/runModule.run.write.configs.R b/base/workflow/R/runModule.run.write.configs.R index 265da4f6c1c..1bddef506f1 100644 --- a/base/workflow/R/runModule.run.write.configs.R +++ b/base/workflow/R/runModule.run.write.configs.R @@ -2,16 +2,17 @@ #' #' @param settings a PEcAn Settings or MultiSettings object #' @param overwrite logical: Replace config files if they already exist? +#' @param use.existing.samples FALSE (default), TRUE, or filename #' @return A modified settings object, invisibly #' @export -runModule.run.write.configs <- function(settings, overwrite = TRUE) { +runModule.run.write.configs <- function(settings, overwrite = TRUE, use.existing.samples = TRUE) { if (PEcAn.settings::is.MultiSettings(settings)) { if (overwrite && file.exists(file.path(settings$rundir, "runs.txt"))) { PEcAn.logger::logger.warn("Existing runs.txt file will be removed.") unlink(file.path(settings$rundir, "runs.txt")) } - return(PEcAn.settings::papply(settings, runModule.run.write.configs, overwrite = FALSE)) + return(PEcAn.settings::papply(settings, runModule.run.write.configs, overwrite = overwrite,use.existing.samples=use.existing.samples)) } else if (PEcAn.settings::is.Settings(settings)) { write <- settings$database$bety$write # double check making sure we have method for parameter sampling @@ -27,7 +28,12 @@ runModule.run.write.configs <- function(settings, overwrite = TRUE) { }) %>% unlist() - return(PEcAn.workflow::run.write.configs(settings, write, ens.sample.method, posterior.files = posterior.files, overwrite = overwrite)) + return(PEcAn.workflow::run.write.configs(settings, + write, + ens.sample.method, + posterior.files = posterior.files, + overwrite = overwrite, + use.existing.samples = use.existing.samples)) } else { stop("runModule.run.write.configs only works with Settings or MultiSettings") } diff --git a/base/workflow/man/run.write.configs.Rd b/base/workflow/man/run.write.configs.Rd index b5be69a3287..0f88633d886 100644 --- a/base/workflow/man/run.write.configs.Rd +++ b/base/workflow/man/run.write.configs.Rd @@ -9,7 +9,8 @@ run.write.configs( write = TRUE, ens.sample.method = "uniform", posterior.files = rep(NA, length(settings$pfts)), - overwrite = TRUE + overwrite = TRUE, + use.existing.samples = FALSE ) } \arguments{ @@ -23,6 +24,8 @@ run.write.configs( analysis (e.g. post.distns.Rdata, or prior.distns.Rdata)} \item{overwrite}{logical: Replace output files that already exist?} + +\item{use.existing.samples}{FALSE (default), TRUE, or filename} } \value{ an updated settings list, which includes ensemble IDs for SA and ensemble analysis diff --git a/base/workflow/man/runModule.run.write.configs.Rd b/base/workflow/man/runModule.run.write.configs.Rd index fcb253947b3..859839b40b7 100644 --- a/base/workflow/man/runModule.run.write.configs.Rd +++ b/base/workflow/man/runModule.run.write.configs.Rd @@ -4,12 +4,18 @@ \alias{runModule.run.write.configs} \title{Generate model-specific run configuration files for one or more PEcAn runs} \usage{ -runModule.run.write.configs(settings, overwrite = TRUE) +runModule.run.write.configs( + settings, + overwrite = TRUE, + use.existing.samples = TRUE +) } \arguments{ \item{settings}{a PEcAn Settings or MultiSettings object} \item{overwrite}{logical: Replace config files if they already exist?} + +\item{use.existing.samples}{FALSE (default), TRUE, or filename} } \value{ A modified settings object, invisibly From c5180ceda737824ac27505a8c76bf068c497dae3 Mon Sep 17 00:00:00 2001 From: Mike Dietze Date: Fri, 14 Jul 2023 10:13:50 -0400 Subject: [PATCH 4/7] updates. trying to deal with keeping ensemble met together and get the 35 day forecast working. --- models/sipnet/R/model2netcdf.SIPNET.R | 4 +- models/sipnet/man/model2netcdf.SIPNET.Rd | 2 +- modules/assim.sequential/R/metSplit.R | 43 ++++++++- .../assim.sequential/R/sda.enkf_MultiSite.R | 39 ++++---- .../hf_landscape/04_ForwardOnlyForecast.R | 49 +++++++--- .../05B_SDA_Workflow_NA.reanalysis.R | 90 ++++++++++++++++--- .../05C_SDA_Workflow_NA.forecast.R | 18 ++-- .../inst/hf_landscape/05_SDA_Workflow_NA.R | 32 ++++--- .../inst/hf_landscape/07_SMAP.R | 4 +- modules/assim.sequential/man/metSplit.Rd | 5 +- 10 files changed, 215 insertions(+), 71 deletions(-) diff --git a/models/sipnet/R/model2netcdf.SIPNET.R b/models/sipnet/R/model2netcdf.SIPNET.R index 33ca6480dc7..f54e1d94944 100644 --- a/models/sipnet/R/model2netcdf.SIPNET.R +++ b/models/sipnet/R/model2netcdf.SIPNET.R @@ -103,7 +103,7 @@ sipnet2datetime <- function(sipnet_tval, base_year, base_month = 1, ##' @export ##' @author Shawn Serbin, Michael Dietze model2netcdf.SIPNET <- function(outdir, sitelat, sitelon, start_date, end_date, delete.raw = FALSE, revision, prefix = "sipnet.out", - overwrite = FALSE, conflict = FALSE) { + overwrite = FALSE, conflict = TRUE) { ### Read in model output in SIPNET format sipnet_out_file <- file.path(outdir, prefix) @@ -323,4 +323,4 @@ model2netcdf.SIPNET <- function(outdir, sitelat, sitelon, start_date, end_date, } } # model2netcdf.SIPNET #--------------------------------------------------------------------------------------------------# -### EOF \ No newline at end of file +### EOF diff --git a/models/sipnet/man/model2netcdf.SIPNET.Rd b/models/sipnet/man/model2netcdf.SIPNET.Rd index 3853c307e6a..697ec32d1bd 100644 --- a/models/sipnet/man/model2netcdf.SIPNET.Rd +++ b/models/sipnet/man/model2netcdf.SIPNET.Rd @@ -14,7 +14,7 @@ model2netcdf.SIPNET( revision, prefix = "sipnet.out", overwrite = FALSE, - conflict = FALSE + conflict = TRUE ) } \arguments{ diff --git a/modules/assim.sequential/R/metSplit.R b/modules/assim.sequential/R/metSplit.R index e48156c0ccc..0e692df6931 100644 --- a/modules/assim.sequential/R/metSplit.R +++ b/modules/assim.sequential/R/metSplit.R @@ -10,13 +10,14 @@ #' @param nens number of ensemble members, taken from settings object #' @param restart_flag TRUE/FALSE taken from restart arguement #' @param my.split_inputs generated by sda.enkf_MultiSite ex. split_inputs.SIPNET +#' @param outpath if specified, write output to a new directory. Default NULL writes back to the directory being read #' @export #' #' @return input.split object with split met filepaths #' #' @author Alexis Helgeson #' -metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, obs.times, t, nens, restart_flag = FALSE, my.split_inputs){ +metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, obs.times, t, nens, restart_flag = FALSE, my.split_inputs, outpath=NULL){ #set start and end date for splitting met start.time = obs.times[t - 1] #always start timestep before @@ -26,6 +27,17 @@ metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, o }else{ stop.time = obs.times[t] } + + inputs.orig = inputs + nens.orig = nens + if(!is.null(outpath)){ + ## build a temporary inputs object based on original inputs in settings + for(i in seq_along(inputs)){ + inputs[[i]]$samples = settings[[i]]$run$inputs$met$path + } + nens = length(settings[[i]]$run$inputs$met$path) + } + #-Splitting the input for the models that they don't care about the start and end time of simulations and they run as long as their met file. inputs.split <- furrr::future_pmap(list(conf.settings %>% `class<-`(c("list")), inputs, model), function(settings, inputs, model) { @@ -42,7 +54,9 @@ metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, o settings = settings, start.time = (lubridate::ymd_hms(start.time, truncated = 3) + lubridate::second(lubridate::hms("00:00:01"))), stop.time = lubridate::ymd_hms(stop.time, truncated = 3), - inputs = inputs$samples[[i]]) + inputs = inputs$samples[[i]], + outpath = file.path(outpath,settings$run$site$id) + ) ) } } else{ @@ -50,6 +64,29 @@ metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, o } inputs.split }) - + + ## match split original files back to resampled + if(!is.null(outpath)){ + + new.inputs = inputs.orig + for(i in seq_along(inputs)){ + new.inputs[[i]] = list() + parent.input = settings[[i]]$run$inputs$met$path ## set of parent met + for(e in seq_along(inputs.orig[[i]]$samples)){ + ## extract prefix from old split ensemble member + old = sub(".clim","",basename(inputs.orig[[i]]$samples[[e]]),fixed=TRUE) + old = strsplit(old,split=".",fixed=TRUE)[[1]][1] + ## match ensemble member + ens = grep(old,unlist(parent.input)) + ## swap + new.inputs[[i]]$samples[[e]] = inputs.split[[i]]$samples[[ens]] + } + names(new.inputs[[i]]$samples) = rep("path",nens.orig) + } + + inputs.split = new.inputs + + } ### end match + return(inputs.split) } diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 215b799fc37..a0f02a29fee 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -117,7 +117,7 @@ sda.enkf.multisite <- function(settings, if(!dir.exists(settings$outdir)) dir.create(settings$outdir, showWarnings = FALSE) ##### Creating matrices that describe the bounds of the state variables - ##### interval is remade everytime depending on the data at time t + ##### interval is remade every time depending on the data at time t ##### state.interval stays constant and converts new.analysis to be within the correct bounds interval <- NULL state.interval <- cbind(as.numeric(lapply(settings$state.data.assimilation$state.variables,'[[','min_value')), @@ -128,11 +128,6 @@ sda.enkf.multisite <- function(settings, if(multi.site.flag){ conf.settings<-settings site.ids <- conf.settings$run %>% purrr::map('site') %>% purrr::map('id') %>% base::unlist() %>% base::as.character() - if(length(site.ids)==0){ ### these hacks should not be nessisary - for(s in seq_along(conf.settings$run)){ - site.ids[s] = conf.settings[[s]]$run[[s]]$site$id - } - } # a matrix ready to be sent to spDistsN1 in sp package - first col is the long second is the lat and row names are the site ids site.locs <- conf.settings$run %>% purrr::map('site') %>% purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>% t %>% @@ -182,7 +177,7 @@ sda.enkf.multisite <- function(settings, } } obs.times <- obs.times.POSIX - read_restart_times <- c(lubridate::ymd_hms(start.cut, truncated = 3), obs.times) + read_restart_times <- c(lubridate::ymd_hms(start.cut, truncated = 3), obs.times) ## TODO: could cause an off-by-one error on iterative forecast if multiple obs times nt <- length(obs.times) #sets length of for loop for Forecast/Analysis if (nt==0) PEcAn.logger::logger.severe('There has to be at least one Obs.') @@ -217,33 +212,33 @@ sda.enkf.multisite <- function(settings, conf.settings <-conf.settings %>% `class<-`(c("list")) %>% #until here, it separates all the settings for all sites that listed in the xml file - furrr::future_map(function(settings) { - library(paste0("PEcAn.",settings$model$type), character.only = TRUE)#solved by including the model in the settings + furrr::future_map(function(my.settings) { + library(paste0("PEcAn.",my.settings$model$type), character.only = TRUE)#solved by including the model in the settings inputs.split <- list() if (!no_split) { - for (i in 1:length(settings$run$inputs$met$path)) { + for (i in 1:length(my.settings$run$inputs$met$path)) { #---------------- model specific split inputs ### model specific split inputs - settings$run$inputs$met$path[[i]] <- do.call( + my.settings$run$inputs$met$path[[i]] <- do.call( my.split_inputs, args = list( - settings = settings, - start.time = lubridate::ymd_hms(settings$run$site$met.start, truncated = 3), # This depends if we are restart or not - stop.time = lubridate::ymd_hms(settings$run$site$met.end, truncated = 3), - inputs = settings$run$inputs$met$path[[i]], - outpath = paste0(paste0(settings$outdir, "/Extracted_met/"), settings$run$site$id), - overwrite =F + settings = my.settings, + start.time = lubridate::ymd_hms(my.settings$run$site$met.start, truncated = 3), # This depends if we are restart or not + stop.time = lubridate::ymd_hms(my.settings$run$site$met.end, truncated = 3), + inputs = my.settings$run$inputs$met$path[[i]], + outpath = paste0(paste0(my.settings$outdir, "/Extracted_met/"), my.settings$run$site$id), + overwrite = FALSE ) ) # changing the start and end date which will be used for model2netcdf.model - settings$run$start.date <- lubridate::ymd_hms(settings$state.data.assimilation$start.date, truncated = 3) - settings$run$end.date <- lubridate::ymd_hms(settings$state.data.assimilation$end.date, truncated = 3) + my.settings$run$start.date <- lubridate::ymd_hms(settings$state.data.assimilation$start.date, truncated = 3) + my.settings$run$end.date <- lubridate::ymd_hms(settings$state.data.assimilation$end.date, truncated = 3) } } else{ inputs.split <- inputs } - settings + my.settings }) conf.settings<- PEcAn.settings::as.MultiSettings(conf.settings) ###-------------------------------------------------------------------### @@ -375,7 +370,9 @@ sda.enkf.multisite <- function(settings, if (t>1){ #for next time step split the met if model requires #-Splitting the input for the models that they don't care about the start and end time of simulations and they run as long as their met file. - inputs.split <- metSplit(conf.settings, inputs, settings, model, no_split = FALSE, obs.times, t, nens, restart_flag = FALSE, my.split_inputs) + inputs.split <- metSplit(conf.settings, inputs, settings, model, no_split = FALSE, + obs.times, t, nens, restart_flag = FALSE, my.split_inputs, + outpath = paste0(settings$outdir, "/Extracted_met/")) #---------------- setting up the restart argument for each site separately and keeping them in a list restart.list <- diff --git a/modules/assim.sequential/inst/hf_landscape/04_ForwardOnlyForecast.R b/modules/assim.sequential/inst/hf_landscape/04_ForwardOnlyForecast.R index 965d488d1bb..b9b63f05ce3 100644 --- a/modules/assim.sequential/inst/hf_landscape/04_ForwardOnlyForecast.R +++ b/modules/assim.sequential/inst/hf_landscape/04_ForwardOnlyForecast.R @@ -6,11 +6,35 @@ option_list = list(optparse::make_option("--start.date", default = Sys.Date(), type="character")) args <- optparse::parse_args(optparse::OptionParser(option_list = option_list)) -#args$start.date = "2022-05-17" +#args$start.date = "2021-09-25" start.date = lubridate::as_date(args$start.date) #### grab & update default settings #### -set = readRDS("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/pecan.RDS") +set = readRDS("/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pecan.RDS") +if(FALSE){ + ## one time updates + projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03" + set$outdir = projectdir + set$ensemble$size = 100 ## increase ensemble size + set$pfts[[1]]$posterior.files = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pfts/PFT_Files_new/temperate/post.distns.Rdata" + set$pfts[[2]]$posterior.files = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pfts/PFT_Files_new/conifer/post.distns.Rdata" + set$pfts[[3]]$posterior.files = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pfts/PFT_Files_new/grass/post.distns.Rdata" + set$pfts[[4]]$posterior.files = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pfts/trait.mcmc.pda.soil.HPDA_som_respiration_rate_correction.Rdata" + ## add soil pft to all sites + soil.pft = grep(pattern = "soil",x=unlist(sapply(set$pfts,function(x){x$name}))) + for(i in seq_along(set)){ + set[[i]]$run$site$site.pft[[2]] = set$pfts[[soil.pft]]$name + names(set[[i]]$run$site$site.pft)[2] = "pft.name" + } + ## initial conditions + for(i in seq_along(set)){ + set[[i]]$run$inputs$poolinitcond = list() + set[[i]]$run$inputs$poolinitcond$path = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/IC/IC_site_1-269",40+i,"_1.nc") + set[[i]]$run$inputs$pft.site = file.path(projectdir,"site_pft.csv") + } + saveRDS(set,file = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pecan.RDS") +} + ##start.date for(s in seq_along(set)){ set[[s]]$run$start.date = start.date @@ -48,27 +72,28 @@ dir.create(set$rundir) dir.create(set$modeloutdir) dir.create(set$pfts$pft$outdir) ## job.sh -set$model$jobtemplate = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/template.job" +set$model$jobtemplate = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/template.job" -set <- PEcAn.settings::prepare.settings(set, force = FALSE) +#set <- PEcAn.settings::prepare.settings(set, force = FALSE) + -## add soil pft to all sites -soil.pft = grep(pattern = "soil",x=unlist(sapply(set$pfts,function(x){x$name}))) -for(i in seq_along(set)){ - set$run[[i]]$site$site.pft[[2]] = set$pfts[[soil.pft]]$name - names(set$run[[i]]$site$site.pft)[2] = "pft.name" -} ## check PFTs pft.names = unlist(sapply(set$pfts,function(x){x$name})) set$pfts[[which(is.na(pft.names))]] = NULL ## run workflow -set <- PEcAn.workflow::runModule.run.write.configs(set) +set <- PEcAn.workflow::runModule.run.write.configs(set,use.existing.samples=TRUE) PEcAn.workflow::runModule_start_model_runs(set, stop.on.error = FALSE) ## future work ## * Integrate in Phyllis's changes to be able to save and reuse previous ensemble draws ## * restart from previous forecast -## not sure why need to delete last line from met to get things to run. \ No newline at end of file +## not sure why need to delete last line from met to get things to run. + +## manual hack to recover runs.txt +library(stringr) +runs = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/FOF2021-09-25/run/runs.txt" +runset = paste0("ENS-",str_pad(rep(1:100,times = 10), 5, pad = "0"),"-10000269",rep(41:50,each=100)) +write(runset,file=runs) diff --git a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R index 8395b60af95..b7a4a68c0ad 100644 --- a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R +++ b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R @@ -3,11 +3,12 @@ ########### CONFIGURATION ################### ## original start date was 2022-05-17 -runDays <- seq(as.Date("2023-07-06"), as.Date("2023-07-08"), by="days") +runDays <- seq(as.Date("2021-09-26"), as.Date("2023-07-10"), by="days") FORCE = FALSE ## should we overwrite previously completed runs +GEN_FIGURES = FALSE ## should we generate figures? ## forecast configuration -projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/" ## main folder +projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/" ## main folder set = readRDS(file.path(projectdir,"pecan.RDS")) pecanhome = "/home/dietze/pecan" ## directory where pecan is installed @@ -18,7 +19,7 @@ source(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/minio_sec #minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "password") minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu") minio_port <- Sys.getenv("MINIO_PORT", "9000") -minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none") +minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-v2") ################# Initial configuration (one time): ############################ ## * update local paths (uncomment, run once, recomment) @@ -56,7 +57,7 @@ for (s in seq_along(runDays)) { ## is there output there? prev.out = dir(file.path(prev,"out"),full.names = TRUE) if(length(prev.out)>0){ - prev.files = sapply(as.list(prev.out),function(x){length(dir(x,pattern = "*.nc"))}) + prev.files = sapply(as.list(prev.out),function(x){length(dir(x,pattern = "*.nc$"))}) if(min(prev.files)>0){ ######### RUN FORECAST ######## @@ -76,7 +77,70 @@ for (s in seq_along(runDays)) { break } -} + ########################################## + ## Ensure Sipnet.out files are merged ## + ########################################## + this.out = dir(file.path(now,"out"),full.names = TRUE) + for(i in seq_along(this.out)){ + #this.out[i] = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/tmp" + ### testing specific hack ### + foo = dir(this.out[i],pattern = "foo.nc",full.names = TRUE) + unlink(foo) + ## get files + ncf = dir(this.out[i],pattern = "*.nc$",full.names = TRUE) + out = dir(this.out[i],pattern = "*.out$",full.names = TRUE) + ## CASE 1: files weren't merged properly + if(length(out)>1 & length(ncf) > 0 ){ + ## grab sipnet output + raw = list() + for(j in seq_along(out)){ + raw[[j]] = read.delim(out[j], header = T, skip = 1, sep = "") + #if(length(out) > 1 & j == 1) raw[[j]][["day"]] = raw[[j]][["day"]] - 1 ## one-time testing hack + } + raw = dplyr::bind_rows(raw) + tvals <- raw[["day"]] + raw[["time"]] / 24 + tdate = tvals + raw[["year"]]*1000 ## composite value for sorting, not a correct datetime + raw = raw[order(tdate),] + y = min(raw[["year"]]) + dates <- PEcAn.SIPNET::sipnet2datetime(tvals,base_year = y) + ## grab netcdf + nc = ncdf4::nc_open(ncf) + ndates = ncdf4::ncvar_get(nc,varid = "time") + ncdf4::nc_close(nc) + if(min(tvals) == min(ndates) & max(tvals) == max(ndates)) next ## check if date range matches + ## if not, clean up and try again + unlink(out) + unlink(ncf) + write.table("header",file=out[length(out)],col.names = F,row.names = F) + write.table(raw,file=out[length(out)],col.names = TRUE,row.names = FALSE,append = TRUE) + #out2 = read.delim(out[length(out)], header = T, skip = 1, sep = "") ## check that things match + PEcAn.SIPNET::model2netcdf.SIPNET(outdir = this.out[i], + sitelat=set[[1]]$run$site$lat, ## works for HF but need to generalize this hard coding to match site + sitelon=set[[1]]$run$site$lat, + start_date = min(dates), + end_date = max(dates), + revision = "ssr" + ) + } else { + ## CASE 1: single output, not converted + if(length(out) == 1 & length(ncf) == 0){ + PEcAn.SIPNET::model2netcdf.SIPNET(outdir = this.out[i], + sitelat=set[[1]]$run$site$lat, ## works for HF but need to generalize this hard coding to match site + sitelon=set[[1]]$run$site$lat, + start_date = runDays[s], + end_date = as.Date(runDays[s]) + lubridate::days(35), + revision = "ssr", + delete.raw = FALSE, + conflict = TRUE + ) + } + } + } ## end loop over files + +} ## end loop over dates + + + ########################################## ## push output to minio in EFI standard ## @@ -93,7 +157,7 @@ minio_uri_public <- function(...) { sprintf(template, minio_path(...), minio_host, ":", minio_port) } -runDays <- seq(as.Date("2023-06-22"), as.Date("2023-07-10"), by="days") +#runDays <- seq(as.Date("2023-06-22"), as.Date("2023-07-10"), by="days") ## loop over dates FORCE = FALSE @@ -131,11 +195,17 @@ for (s in seq_along(runDays)) { start_date = runDays[s]) } out = dplyr::bind_rows(out) + if(GEN_FIGURES){ + ylim = range(out$GPP,na.rm=TRUE) + time = sort(unique(out$time_bounds)) + plot(out$time_bounds,out$GPP,ylim=ylim,pch=".") ## GPP is UTC, has wrong shape, and is only one day + } if(!is.numeric(nrow(out)) | nrow(out) == 0) next ## don't insert empty days into minio - out = out %>% relocate(parameter) %>% - relocate(site_id) %>% - relocate(time_bounds) %>% rename(datetime=time_bounds) %>% - relocate(reference_datetime) + out = out %>% dplyr::relocate(parameter) %>% + dplyr::relocate(site_id) %>% + dplyr::relocate(time_bounds) %>% + dplyr::rename(datetime=time_bounds) %>% + dplyr::relocate(reference_datetime) out = tidyr::pivot_longer(out,5:ncol(out),names_to = "variable",values_to = "prediction") ## push to container in parquet format diff --git a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R index 3cc2c17a6b5..1cf4d6f90d4 100644 --- a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R +++ b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R @@ -117,9 +117,10 @@ for (s in seq_along(runDays)) { ## did we write this run to minio already? if(!FORCE){ ## if not overwriting - ens = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% - dplyr::filter(lubridate::as_datetime(reference_datetime) == runDays[s]) %>% - dplyr::distinct(parameter) %>% dplyr::collect() + ens = arrow::open_dataset(minio_uri_public(), format = "parquet" ) |> + dplyr::filter(lubridate::as_datetime(reference_datetime) == runDays[s]) |> + dplyr::distinct(parameter) |> + dplyr::collect() if(length(ens$parameter)>0) { print(paste("skipping",length(ens$parameter))) next @@ -140,14 +141,15 @@ for (s in seq_along(runDays)) { } out = dplyr::bind_rows(out) if(!is.numeric(nrow(out)) | nrow(out) == 0) next ## don't insert empty days into minio - out = out %>% relocate(parameter) %>% - relocate(site_id) %>% - relocate(time_bounds) %>% rename(datetime=time_bounds) %>% - relocate(reference_datetime) + out = out %>% dplyr::relocate(parameter) |> + dplyr::relocate(site_id) |> + dplyr::relocate(time_bounds) |> + dplyr::rename(datetime=time_bounds) |> + dplyr::relocate(reference_datetime) out = tidyr::pivot_longer(out,5:ncol(out),names_to = "variable",values_to = "prediction") ## push to container in parquet format - out %>% dplyr::group_by(reference_datetime) %>% arrow::write_dataset(minio_uri(),format="parquet") + out |> dplyr::group_by(reference_datetime) |> arrow::write_dataset(minio_uri(),format="parquet") } diff --git a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R index d54acb9e19b..01fa1dc7272 100755 --- a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R +++ b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R @@ -41,7 +41,11 @@ option_list = list(optparse::make_option("--start.date", args <- optparse::parse_args(optparse::OptionParser(option_list = option_list)) #args$start.date = "2022-05-18 00:00:00" #args$prev = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FOF2022-05-17/" + +## set dates start.date = lubridate::as_date(args$start.date) +prev.date = basename(args$prev) |> substr(start=4,stop=13) |> lubridate::as_date() +end.date <- start.date + lubridate::days(35) #------------------------------------------------------------------------------------------------ #------------------------------------------ Preparing the pecan xml ----------------------------- @@ -49,21 +53,22 @@ start.date = lubridate::as_date(args$start.date) restart <- list() restart$filepath <- args$prev set = readRDS(args$settings) - -#set met.start & met.end -end.date <- start.date + lubridate::days(35) -sda.start = start.date + lubridate::days(1) projectdir = set$outdir # -------------------------------------------------------------------------------------------------- #---------------------------------------------- NA DATA ------------------------------------- # -------------------------------------------------------------------------------------------------- -#initialize obs.mean/cov NAs +#initialize obs.mean/cov NAs & dates +# note: +# for t=1: SDA doesn’t use metSplit, uses met.start, met.end +# for iterative forecast restart, this is PREV +# for t>1: metSplit uses obs.times t-1 -> t site.ids <- PEcAn.settings::papply(set,function(x)(x$run$site$id)) %>% unlist() %>% as.character() nsite = length(site.ids) -NAdata = data.frame(date = c(rep(start.date,nsite),rep(sda.start,nsite)), +NAdata = data.frame(date = c(rep(format(start.date, "%Y-%m-%d %H:%M:%S", tz = "GMT"),nsite), + rep(format(end.date, "%Y-%m-%d %H:%M:%S", tz = "GMT"),nsite)), ##coerce to datetimes to avoid pumpkin rule site_id = rep(site.ids,times=2), data = rep(NA,nsite*2)) obs.mean <- obs.cov <- split(NAdata, NAdata$date) @@ -95,7 +100,7 @@ obs.cov <- purrr::map( ) %>% stats::setNames(date.obs) #add start.cut to restart list -restart$start.cut <- start.date +restart$start.cut <- as_datetime(start.date) restart$start.cut <- format(restart$start.cut, "%Y-%m-%d %H:%M:%S", tz = "GMT") @@ -106,8 +111,8 @@ restart$start.cut <- format(restart$start.cut, "%Y-%m-%d %H:%M:%S", tz = "GMT") for(s in seq_along(set)){ set[[s]]$run$start.date = start.date set[[s]]$run$end.date = end.date - set[[s]]$run$site$met.start = start.date - set[[s]]$run$site$met.end = end.date + set[[s]]$run$site$met.start = prev.date ## time period for t == 1, prev forecast + set[[s]]$run$site$met.end = start.date } # Setting dates in assimilation tags - This will help with preprocess split in SDA code @@ -143,7 +148,8 @@ met_paths <- list.files(path = file.path(path, start.date), full.names = TRUE, p #met_paths <- list.files(path = file.path(settings$run$settings.1$inputs$met$path, start.date), full.names = TRUE, pattern = ".clim") if(purrr::is_empty(met_paths)){ print(paste("SKIPPING: NO MET FOR",start.date)) - cat(as.character(start.date),sep="\n",file=file.path(dirname(set$outdir),"NO_MET"),append=TRUE) ## add to list of dates missing met +## cat(as.character(start.date),sep="\n",file=file.path(dirname(set$outdir),"NO_MET"),append=TRUE) ## add to list of dates missing met +## TODO: for forecast, don't update NO_MET as files may still arrive; only update for reanalysis stop_quietly() } met_paths = as.list(met_paths) @@ -158,10 +164,12 @@ prev_run_ids = list.files(file.path(restart$filepath, "out")) run_id = as.data.frame(strsplit(prev_run_ids,"-")) %>% t() colnames(run_id) <- c("pre","ens","site") rownames(run_id) <- NULL -run_id = as.data.frame(run_id) %>% mutate(folder=prev_run_ids,id = paste0("id",.data$ens)) %>% group_by(site) +run_id = as.data.frame(run_id) %>% + dplyr::mutate(folder=prev_run_ids,id = paste0("id",.data$ens)) %>% + dplyr::group_by(site) ###settings$runs$id = run_id for(s in seq_along(set)){ - site_run_id = run_id |> filter(site == as.list(set$run[[s]]$site$id)[[1]]) + site_run_id = run_id |> dplyr::filter(site == as.list(set[[s]]$run$site$id)[[1]]) set[[s]]$run$id = as.list(site_run_id$folder) names(set[[s]]$run$id) = site_run_id$id } diff --git a/modules/assim.sequential/inst/hf_landscape/07_SMAP.R b/modules/assim.sequential/inst/hf_landscape/07_SMAP.R index 4cd9c3c2841..67f48d786f1 100644 --- a/modules/assim.sequential/inst/hf_landscape/07_SMAP.R +++ b/modules/assim.sequential/inst/hf_landscape/07_SMAP.R @@ -1,4 +1,6 @@ +#!/usr/bin/env Rscript ## SMAP data prep cron script +.libPaths(c("/projectnb/dietzelab/dietze/test-pecan/R/library",.libPaths())) #### Libraries and configs #BiocManager::install("rhdf5") @@ -184,7 +186,7 @@ for(t in seq_along(status$date)){ ## determine what sites have already been processed SMAP_done = SMAP_CSV[SMAP_CSV$site_id %in% site.id & SMAP_CSV$date == status$date[t],] - if(nrow(SMAP_done) > 0){ + if(nrow(SMAP_done) > 0 & !all(is.na(SMAP_done))){ site.todo = !(site.id %in% SMAP_done$site_id) if(all(!site.todo)){ next } ## done, skip to the next year } else { diff --git a/modules/assim.sequential/man/metSplit.Rd b/modules/assim.sequential/man/metSplit.Rd index 63758ae5ddd..18c99376787 100644 --- a/modules/assim.sequential/man/metSplit.Rd +++ b/modules/assim.sequential/man/metSplit.Rd @@ -14,7 +14,8 @@ metSplit( t, nens, restart_flag = FALSE, - my.split_inputs + my.split_inputs, + outpath = NULL ) } \arguments{ @@ -37,6 +38,8 @@ metSplit( \item{restart_flag}{TRUE/FALSE taken from restart arguement} \item{my.split_inputs}{generated by sda.enkf_MultiSite ex. split_inputs.SIPNET} + +\item{outpath}{if specified, write output to a new directory. Default NULL writes back to the directory being read} } \value{ input.split object with split met filepaths From 44e58ee3333198bd4942dc0a2c36c5b020550b41 Mon Sep 17 00:00:00 2001 From: Mike Dietze Date: Wed, 26 Jul 2023 16:09:38 -0400 Subject: [PATCH 5/7] forecast: check on sipnet -> nc conversion; SDA: tweaks on how parameter ensembles are set and updated. --- .../assim.sequential/R/sda.enkf_MultiSite.R | 44 +++++++++---------- .../05C_SDA_Workflow_NA.forecast.R | 21 +++++++++ 2 files changed, 43 insertions(+), 22 deletions(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index a0f02a29fee..daf07d66f75 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -334,29 +334,29 @@ sda.enkf.multisite <- function(settings, # weight matrix wt.mat <- matrix(NA, nrow = nens, ncol = nt) # Reading param samples------------------------------- - #create params object using samples generated from TRAITS functions - if(restart_flag){ - new.params <- new.params - }else{ - if(!file.exists(file.path(settings$outdir, "samples.Rdata"))) PEcAn.logger::logger.severe("samples.Rdata cannot be found. Make sure you generate samples by running the get.parameter.samples function before running SDA.") - #Generate parameter needs to be run before this to generate the samples. This is hopefully done in the main workflow. - if(is.null(ensemble.samples)){ - load(file.path(settings$outdir, "samples.Rdata")) - } - #reformatting params - new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) + #create params object using samples generated from TRAITS functions + if(restart_flag){ + new.params <- new.params + }else{ + if(!file.exists(file.path(settings$outdir, "samples.Rdata"))) PEcAn.logger::logger.severe("samples.Rdata cannot be found. Make sure you generate samples by running the get.parameter.samples function before running SDA.") + #Generate parameter needs to be run before this to generate the samples. This is hopefully done in the main workflow. + if(is.null(ensemble.samples)){ + load(file.path(settings$outdir, "samples.Rdata")) } - #sample met ensemble members - #TODO: incorporate Phyllis's restart work - # sample all inputs specified in the settings$ensemble not just met - inputs <- PEcAn.settings::papply(conf.settings,function(setting) { - PEcAn.uncertainty::input.ens.gen( - settings = setting, - input = "met", - method = setting$ensemble$samplingspace$met$method, - parent_ids = NULL - ) - }) + #reformatting params + new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens) + } + #sample met ensemble members + #TODO: incorporate Phyllis's restart work + # sample all inputs specified in the settings$ensemble not just met + inputs <- PEcAn.settings::papply(conf.settings,function(setting) { + PEcAn.uncertainty::input.ens.gen( + settings = setting, + input = "met", + method = setting$ensemble$samplingspace$met$method, + parent_ids = NULL + ) + }) ###------------------------------------------------------------------------------------------------### ### loop over time ### ###------------------------------------------------------------------------------------------------### diff --git a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R index 1cf4d6f90d4..6d3fdbe5ef8 100644 --- a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R +++ b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R @@ -78,6 +78,27 @@ for (s in seq_along(runDays)) { stderr="stderr.log") print(msg) + + ## check that model2netcdf was run + this.out = dir(file.path(now,"out"),full.names = TRUE) + for(i in seq_along(this.out)){ + ## get files + ncf = dir(this.out[i],pattern = "*.nc$",full.names = TRUE) + out = dir(this.out[i],pattern = "*.out$",full.names = TRUE) + if(length(out) >= 1 & length(ncf) == 0){ + PEcAn.SIPNET::model2netcdf.SIPNET(outdir = this.out[i], + sitelat=set[[1]]$run$site$lat, ## works for HF but need to generalize this hard coding to match site + sitelon=set[[1]]$run$site$lat, + start_date = runDays[s], + end_date = as.Date(runDays[s]) + lubridate::days(35), + revision = "ssr", + delete.raw = FALSE, + conflict = TRUE + ) + } + } + + } } else { break } } else { From 27ba92d532df8da65404bc7d61f25cd6aa171c65 Mon Sep 17 00:00:00 2001 From: Mike Dietze Date: Wed, 26 Jul 2023 16:16:07 -0400 Subject: [PATCH 6/7] switch to native pipes --- modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R b/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R index 4bace0a4de4..f8d9175e7c4 100644 --- a/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R +++ b/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R @@ -63,7 +63,7 @@ PEcAn2EFI <- function(outdir = NULL, ne = NULL, site_id = NULL, start_date=NULL, ensemble.output[[row]]$time_bounds = tod ## convert to data frame - ensemble.output[[row]] = as.data.frame(ensemble.output[[row]]) %>% + ensemble.output[[row]] = as.data.frame(ensemble.output[[row]]) |> dplyr::mutate( parameter = row, reference_datetime = lubridate::as_datetime(start_date), @@ -99,7 +99,7 @@ PEcAn2EFI.ens <- function(outdir,run.id,start_date,end_date = NULL){ ensemble.output$time_bounds = tod ## convert to data frame - ensemble.output = as.data.frame(ensemble.output) %>% + ensemble.output = as.data.frame(ensemble.output) |> dplyr::mutate( parameter = ens.id, reference_datetime = lubridate::as_datetime(start_date), From 7c39df4e9f8cc037dcfa90e3d7a2736f5770e7ff Mon Sep 17 00:00:00 2001 From: Michael Dietze Date: Wed, 13 Mar 2024 09:04:47 -0400 Subject: [PATCH 7/7] Update base/workflow/R/runModule.run.write.configs.R Co-authored-by: Chris Black --- base/workflow/R/runModule.run.write.configs.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/base/workflow/R/runModule.run.write.configs.R b/base/workflow/R/runModule.run.write.configs.R index 1bddef506f1..200677b5cf4 100644 --- a/base/workflow/R/runModule.run.write.configs.R +++ b/base/workflow/R/runModule.run.write.configs.R @@ -12,7 +12,11 @@ runModule.run.write.configs <- function(settings, overwrite = TRUE, use.existing PEcAn.logger::logger.warn("Existing runs.txt file will be removed.") unlink(file.path(settings$rundir, "runs.txt")) } - return(PEcAn.settings::papply(settings, runModule.run.write.configs, overwrite = overwrite,use.existing.samples=use.existing.samples)) + return(PEcAn.settings::papply( + settings, + runModule.run.write.configs, + overwrite = overwrite, + use.existing.samples = use.existing.samples)) } else if (PEcAn.settings::is.Settings(settings)) { write <- settings$database$bety$write # double check making sure we have method for parameter sampling