From 19af737dbcfb9c77f09fcbad8132f3138528718e Mon Sep 17 00:00:00 2001
From: Mike Dietze <dietze@bu.edu>
Date: Mon, 19 Jun 2023 10:41:58 -0400
Subject: [PATCH 1/7] update reanalysis to generalizations in forecast,

---
 .../05B_SDA_Workflow_NA.reanalysis.R          | 72 +++++++++++-----
 .../05C_SDA_Workflow_NA.forecast.R            | 85 +++++++++++++------
 .../inst/hf_landscape/05_SDA_Workflow_NA.R    | 13 +--
 .../inst/hf_landscape/06_cron.Rmd             | 14 +++
 4 files changed, 132 insertions(+), 52 deletions(-)

diff --git a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
index 66ea9181fda..a0af1b23f4b 100644
--- a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
+++ b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
@@ -1,22 +1,57 @@
 ## Reanalysis helper script around 05_SDA_Workflow_NA
+
+###########  CONFIGURATION ###################
+
 ## original start date was 2022-05-17
-runDays <- seq(as.Date("2022-05-20"), as.Date("2023-05-11"), by="days")
+runDays <- seq(as.Date("2023-05-30"), as.Date("2023-06-13"), by="days")
 FORCE = FALSE  ## should we overwrite previously completed runs
 
+## forecast configuration
+projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/" ## main folder
+set = readRDS(file.path(projectdir,"pecan.RDS"))
+pecanhome = "/home/dietze/pecan"  ## directory where pecan is installed 
+
+## S3 bucket for output
+source(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/minio_secrets.R"))
+## minio_secrets contains the following:
+#minio_key <- Sys.getenv("MINIO_ACCESS_KEY", "username")
+#minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "password")
+minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu")
+minio_port <- Sys.getenv("MINIO_PORT", "9000")
+minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none")
+
+################# Initial configuration (one time): ############################
+##  * update local paths (uncomment, run once, recomment)
+# set$outdir = projectdir
+# for(i in seq_along(set$pfts)){
+#   set$pfts[[i]]$posterior.files = file.path(projectdir,"pfts",basename(set$pfts[[i]]$posterior.files))
+# }
+# set$model$binary = file.path(projectdir,"model",basename(set$model$binary))
+# set$model$jobtemplate = file.path(projectdir,"template.job")
+# for(i in seq_along(set$run)){
+#   set$run[[i]]$inputs$pft.site = file.path(projectdir,"site_pft.csv")
+#   set$run[[i]]$inputs$poolinitcond$path = file.path(projectdir,"IC",basename(set$run[[i]]$inputs$poolinitcond$path))
+#   set$run[[i]]$inputs$met$path = file.path(projectdir,"GEFS")
+# }
+# saveRDS(set,file=file.path(projectdir,"pecan.RDS"))  
+##  * update set$database$bety$host
+##  * set up separate cron jobs for input prep (met, constraints)
+
+###########  RUN REANALYSIS ####################
 for (s in seq_along(runDays)) {
   ## did we do this run already?
-  now  = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FNA",runDays[s])
+  now  = file.path(projectdir,paste0("FNA",runDays[s]))
   print(now)
   this.out = dir(file.path(now,"out"),full.names = TRUE)
-  if(length(this.out) > 0 & !FORCE) break
+  if(length(this.out) > 0 & !FORCE) next
   
   ## find previous run
   yesterday = runDays[s] - lubridate::days(1)
-  NoMet = read.csv("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/NO_MET",header=FALSE)[,1]
+  NoMet = read.csv(file.path(projectdir,"NO_MET"),header=FALSE)[,1]
   while(as.character(yesterday) %in% NoMet & yesterday - runDays[s] < lubridate::days(35) ){
     yesterday = yesterday - lubridate::days(1)
   }
-  prev = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FNA",yesterday,"/")
+  prev = file.path(projectdir,paste0("FNA",yesterday,"/"))
   if(dir.exists(prev)){
     ## is there output there?
     prev.out = dir(file.path(prev,"out"),full.names = TRUE)
@@ -25,8 +60,10 @@ for (s in seq_along(runDays)) {
       if(min(prev.files)>0){
         
         #########   RUN FORECAST   ########
-        msg = system2("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R",
-                      paste("--start.date",runDays[s],"--prev",prev),
+        msg = system2(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R"),
+                      paste("--start.date",runDays[s],
+                            "--prev",prev,
+                            "--settings",file.path(projectdir,"pecan.RDS")),
                       wait=TRUE,
                       stdout="stdout.log",
                       stderr="stderr.log")
@@ -44,17 +81,7 @@ for (s in seq_along(runDays)) {
 ##########################################
 ## push output to minio in EFI standard ##
 ##########################################
-
-## minio settings and helper functions
-
-source("~/pecan/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R")
-source("~/pecan/modules/assim.sequential/inst/hf_landscape/minio_secrets.R")
-## minio_secrets contains the following:
-#minio_key <- Sys.getenv("MINIO_ACCESS_KEY", "username")
-#minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "password")
-minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu")
-minio_port <- Sys.getenv("MINIO_PORT", "9000")
-minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none")
+source(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R"))
 # helper function for minio URIs
 minio_path <- function(...) paste(minio_arrow_bucket, ..., sep = "/")
 minio_uri <- function(...) {
@@ -66,13 +93,13 @@ minio_uri_public <- function(...) {
   sprintf(template, minio_path(...), minio_host, ":", minio_port)
 }
 
-runDays <- seq(as.Date("2022-05-18"), as.Date("2023-05-11"), by="days")
+runDays <- seq(as.Date("2022-11-25"), as.Date("2023-06-13"), by="days")
 
 ## loop over dates
 FORCE = FALSE
 for (s in seq_along(runDays)) {
   ## did we do this run already?
-  now  = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FNA",runDays[s])
+  now  = file.path(projectdir,paste0("FNA",runDays[s]))
   print(now)
   this.out = dir(file.path(now,"out"),full.names = TRUE)
   if(length(this.out) == 0){
@@ -115,3 +142,8 @@ for (s in seq_along(runDays)) {
   out %>% dplyr::group_by(reference_datetime) %>% arrow::write_dataset(minio_uri(),format="parquet")
   
 }
+
+## list set of uploaded dates
+submitted = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% 
+  dplyr::distinct(reference_datetime) %>% dplyr::collect() %>% as.vector()
+
diff --git a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
index dd6e15b88b8..34eaceb7976 100644
--- a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
+++ b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
@@ -1,13 +1,42 @@
 ## Forecast helper script around 05_SDA_Workflow_NA
-runDays = Sys.Date()
+
+## forecast configuration
+projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/" ## main folder
+set = readRDS(file.path(projectdir,"pecan.RDS"))
+pecanhome = "/home/dietze/pecan"  ## directory where pecan is installed 
+
+## S3 bucket for output
+minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu")
+minio_port <- Sys.getenv("MINIO_PORT", "9000")
+minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none")
+
+################# Initial configuration (one time): ############################
+##  * update local paths (uncomment, run once, recomment)
+  # set$outdir = projectdir
+  # for(i in seq_along(set$pfts)){
+  #   set$pfts[[i]]$posterior.files = file.path(projectdir,"pfts",basename(set$pfts[[i]]$posterior.files))
+  # }
+  # set$model$binary = file.path(projectdir,"model",basename(set$model$binary))
+  # set$model$jobtemplate = file.path(projectdir,"template.job")
+  # for(i in seq_along(set$run)){
+  #   set$run[[i]]$inputs$pft.site = file.path(projectdir,"site_pft.csv")
+  #   set$run[[i]]$inputs$poolinitcond$path = file.path(projectdir,"IC",basename(set$run[[i]]$inputs$poolinitcond$path))
+  #   set$run[[i]]$inputs$met$path = file.path(projectdir,"GEFS")
+  # }
+  # saveRDS(set,file=file.path(projectdir,"pecan.RDS"))  
+##  * update set$database$bety$host
+##  * set up separate cron jobs for input prep (met, constraints)
+
+###################  TODAY'S SETTINGS  ########################
+runDays = Sys.Date()  ## for test case set this to 2022-05-22
 FORCE = FALSE  ## should we overwrite previously completed runs
 
 ## check for missed days
 start_date = runDays
 success = FALSE
-NoMet = read.csv("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/NO_MET",header=FALSE)[,1]
+NoMet = read.csv(file.path(projectdir,"NO_MET"),header=FALSE)[,1]
 while(!success & runDays - start_date < lubridate::days(35) ){
-  this.out = dir(file.path(paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/FNA",start_date),"out"),full.names = TRUE)
+  this.out = dir(file.path(paste0(projectdir,"/FNA",start_date),"out"),full.names = TRUE)
   if(length(this.out) > 0 & !FORCE) { ## this day ran successfully
     success = TRUE
     break
@@ -16,21 +45,20 @@ while(!success & runDays - start_date < lubridate::days(35) ){
 }
 runDays = seq(from=start_date,to=runDays,by="1 day")
 
-## run forecast
+#####################  run forecast  #####################
 for (s in seq_along(runDays)) {
   ## did we do this run already?
-  now  = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/FNA",runDays[s])
+  now  = paste0(projectdir,"/FNA",runDays[s])
   print(now)
   this.out = dir(file.path(now,"out"),full.names = TRUE)
-  if(length(this.out) > 0 & !FORCE) break
+  if(length(this.out) > 0 & !FORCE) next
   
   ## find previous run
   yesterday = runDays[s] - lubridate::days(1)
-  NoMet = read.csv("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/NO_MET",header=FALSE)[,1]
   while(as.character(yesterday) %in% NoMet & yesterday - runDays[s] < lubridate::days(35) ){
     yesterday = yesterday - lubridate::days(1)
   }
-  prev = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/FNA",yesterday)
+  prev = paste0(projectdir,"/FNA",yesterday)
   if(dir.exists(prev)){
     ## is there output there?
     prev.out = dir(file.path(prev,"out"),full.names = TRUE)
@@ -39,8 +67,10 @@ for (s in seq_along(runDays)) {
       if(min(prev.files)>0){
         
         #########   RUN FORECAST   ########
-        msg = system2("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R",
-                      paste("--start.date",runDays[s],"--prev",prev),
+        msg = system2(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R"),
+                      paste("--start.date",runDays[s],
+                            "--prev",prev,
+                            "--settings",file.path(projectdir,"pecan.RDS")),
                       wait=TRUE,
                       stdout="stdout.log",
                       stderr="stderr.log")
@@ -48,8 +78,8 @@ for (s in seq_along(runDays)) {
         
       }
     } else { break }
-  } else {mc("ls minio")
-    ## previous run didn't occur
+  } else {
+    print("previous run didn't occur")
     break
   }
   
@@ -60,14 +90,7 @@ for (s in seq_along(runDays)) {
 ##########################################
 
 ## minio settings and helper functions
-source("~/pecan/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R")
-source("~/pecan/modules/assim.sequential/inst/hf_landscape/minio_secrets.R")
-## minio_secrets contains the following:
-#minio_key <- Sys.getenv("MINIO_ACCESS_KEY", "username")
-#minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "password")
-minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu")
-minio_port <- Sys.getenv("MINIO_PORT", "9000")
-minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none")
+source(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R"))
 # helper function for minio URIs
 minio_path <- function(...) paste(minio_arrow_bucket, ..., sep = "/")
 minio_uri <- function(...) {
@@ -82,7 +105,7 @@ minio_uri_public <- function(...) {
 ## loop over dates
 for (s in seq_along(runDays)) {
   ## did we do this run already?
-  now  = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test01/FNA",runDays[s])
+  now  = paste0(projectdir,"/FNA",runDays[s])
   print(now)
   this.out = dir(file.path(now,"out"),full.names = TRUE)
   if(length(this.out) == 0){
@@ -91,12 +114,14 @@ for (s in seq_along(runDays)) {
   } 
   
   ## did we write this run to minio already?
-  ens = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% 
-    dplyr::filter(lubridate::as_datetime(reference_datetime) == runDays[s]) %>%  
-    dplyr::distinct(parameter) %>% dplyr::collect()
-  if(length(ens$parameter)>0) {
-    print(paste("skipping",length(ens$parameter)))
-    next
+  if(!FORCE){  ## if not overwriting
+    ens = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% 
+      dplyr::filter(lubridate::as_datetime(reference_datetime) == runDays[s]) %>%  
+      dplyr::distinct(parameter) %>% dplyr::collect()
+    if(length(ens$parameter)>0) {
+      print(paste("skipping",length(ens$parameter)))
+      next
+    }
   }
   
   ## identify runs in the output folder
@@ -112,6 +137,12 @@ for (s in seq_along(runDays)) {
                                    start_date = runDays[s])
   }
   out = dplyr::bind_rows(out)
+  if(!is.numeric(nrow(out)) | nrow(out) == 0) next  ## don't insert empty days into minio
+  out = out %>% relocate(parameter) %>% 
+    relocate(site_id) %>%
+    relocate(time_bounds) %>% rename(datetime=time_bounds) %>%
+    relocate(reference_datetime)
+  out = tidyr::pivot_longer(out,5:ncol(out),names_to = "variable",values_to = "prediction")
   
   ## push to container in parquet format
   out %>% dplyr::group_by(reference_datetime) %>% arrow::write_dataset(minio_uri(),format="parquet")
diff --git a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
index 6c73d37cdb2..0a4e183e14a 100755
--- a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
+++ b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
@@ -32,7 +32,8 @@ option_list = list(optparse::make_option("--start.date",
                                          default = Sys.Date(),
                                          type="character"),
                    optparse::make_option("--prev",
-                                         default = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FNA",Sys.Date()-lubridate::days(1)),
+                                         type="character"),
+                   optparse::make_option("--settings",
                                          type="character")
                    )
 args <- optparse::parse_args(optparse::OptionParser(option_list = option_list))
@@ -45,18 +46,18 @@ start.date = lubridate::as_date(args$start.date)
 #------------------------------------------------------------------------------------------------
 restart <- list()
 restart$filepath <- args$prev
-set = readRDS("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/pecan.RDS")
+set = readRDS(args$settings)
 
 #set met.start & met.end
 end.date <- start.date + lubridate::days(35)
 sda.start = start.date + lubridate::days(1)
+projectdir = set$outdir
 
 # --------------------------------------------------------------------------------------------------
 #---------------------------------------------- NA DATA -------------------------------------
 # --------------------------------------------------------------------------------------------------
 
 #initialize obs.mean/cov NAs
-## TODO: Alexis's version had two dates, need to take a closer list at what dates these should be set to
 site.ids <- papply(set,function(x)(x$run$site$id)) %>% unlist() %>% as.character()
 nsite = length(site.ids)
 
@@ -135,7 +136,9 @@ dir.create(set$modeloutdir)
 dir.create(set$pfts$pft$outdir)
 
 #manually add in clim files 
-met_paths <- list.files(path = file.path("/projectnb/dietzelab/ahelgeso/NOAA_met_data_CH1/noaa_clim/HARV", start.date), full.names = TRUE, pattern = ".clim")
+path = "/projectnb/dietzelab/ahelgeso/NOAA_met_data_CH1/noaa_clim/HARV/" ## hack
+met_paths <- list.files(path = file.path(path, start.date), full.names = TRUE, pattern = ".clim")
+#met_paths <- list.files(path = file.path(settings$run$settings.1$inputs$met$path, start.date), full.names = TRUE, pattern = ".clim")
 if(is_empty(met_paths)){
   print(paste("SKIPPING: NO MET FOR",start.date))
   cat(as.character(start.date),sep="\n",file=file.path(dirname(set$outdir),"NO_MET"),append=TRUE) ## add to list of dates missing met
@@ -162,7 +165,7 @@ for(s in seq_along(set)){
 }
 
 ## job.sh
-set$model$jobtemplate = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/template.job"
+if(is.null(set$model$jobtemplate)) set$model$jobtemplate = file.path(projectdir,"template.job")
 
 #save restart object
 save(restart, next.oldir, args, file = file.path(set$outdir, "restart.Rdata"))
diff --git a/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd b/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd
index 31041cde112..62ee2efec99 100644
--- a/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd
+++ b/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd
@@ -5,7 +5,21 @@ output: html_notebook
 
 ```{r}
 library(cronR)
+```
+
+
+## Hello World
+```{r}
+
+```
 
+## Met forecast (taking over for Alexis)
+```{r}
+
+```
+
+
+```{r}
 ### NO ASSIMILATION FORECAST
 cmd <- cronR::cron_rscript("~/pecan/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R")
 cron_add(command = cmd, frequency = 'daily', at='4AM', id = 'HARV_FNA', description = "Harvard Forest landscape forecast, No Assimilated data")

From 2b86b1b7e0321846eb0c9d1502f525df4e7f6fb6 Mon Sep 17 00:00:00 2001
From: Mike Dietze <dietze@bu.edu>
Date: Mon, 10 Jul 2023 10:44:12 -0400
Subject: [PATCH 2/7] tweaks to get HF landscape forecast restarted and cron
 jobs working.

---
 .../R/Analysis_sda_multiSite.R                | 93 ++++++++++---------
 .../assim.sequential/R/sda.enkf_MultiSite.R   | 13 +--
 .../05B_SDA_Workflow_NA.reanalysis.R          |  6 +-
 .../05C_SDA_Workflow_NA.forecast.R            |  2 +
 .../inst/hf_landscape/05_SDA_Workflow_NA.R    | 21 +++--
 .../inst/hf_landscape/06_cron.Rmd             | 29 +++++-
 .../inst/hf_landscape/PEcAn2EFI.R             |  5 +-
 modules/assim.sequential/man/MCMC_function.Rd |  4 +-
 modules/data.atmosphere/NAMESPACE             |  1 +
 .../R/download_noaa_gefs_efi.R                | 12 ++-
 ..._GEFS_EFI.Rd => download.NOAA_GEFS_EFI.Rd} | 12 ++-
 scripts/HARV_metdownload_efi.R                | 82 +++++++++-------
 12 files changed, 173 insertions(+), 107 deletions(-)
 rename modules/data.atmosphere/man/{download_NOAA_GEFS_EFI.Rd => download.NOAA_GEFS_EFI.Rd} (70%)
 mode change 100644 => 100755 scripts/HARV_metdownload_efi.R

diff --git a/modules/assim.sequential/R/Analysis_sda_multiSite.R b/modules/assim.sequential/R/Analysis_sda_multiSite.R
index 4b4c67f92c4..bdba0fd3e92 100644
--- a/modules/assim.sequential/R/Analysis_sda_multiSite.R
+++ b/modules/assim.sequential/R/Analysis_sda_multiSite.R
@@ -313,8 +313,9 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){
                     ceiling(elements.W.Data/length(var.names))],
           settings$state.data.assimilation$scalef %>% as.numeric()
         )
-      }
-
+  }
+  if(is.null(aqq)) PEcAn.logger::logger.severe("aqq undefined")
+  
   ### create matrix the describes the support for each observed state variable at time t
   interval <- matrix(NA, length(unlist(obs.mean[[t]])), 2)
   
@@ -346,7 +347,7 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){
   #### from the interval matrix
   y.ind <- as.numeric(Y > interval[, 1])
   y.censored <- as.numeric(ifelse(Y > interval[, 1], Y, 0))
-  data <- list(elements.W.Data = elements.W.Data,
+  MCMC_data <- list(elements.W.Data = elements.W.Data,
                X = X,
                Pf = Pf,
                aqq = aqq,
@@ -359,8 +360,14 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){
                nitr.GEF = extraArg$nitr.GEF,
                nburnin = extraArg$nburnin,
                nthin = extraArg$nthin,
-               monitors = c("Xall", "qq"))
-  outputs <- furrr::future_map(lapply(rep("data", as.numeric(settings$state.data.assimilation$chains)), get), MCMC_function)
+               monitors = c("Xall", "qq"),
+               t=t)
+  tmp = list()
+  for(i in seq_len(as.numeric(settings$state.data.assimilation$chains))){
+    tmp[[i]] = MCMC_data
+  }
+  MCMC_data = tmp
+  outputs <- furrr::future_map(MCMC_data, MCMC_function)
   dat <- do.call(rbind, outputs)
   
   #---- Saving the chains
@@ -372,6 +379,7 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){
   Pa   <- stats::cov(dat[, iX])
   Pa[is.na(Pa)] <- 0
   mq <- dat[,  grep("q", colnames(dat))]  # Omega, Precision
+  if(is.null(dim(mq))) mq = matrix(mq,ncol=1)
   q.bar <- matrix(apply(mq, 2, mean),
                   length(elements.W.Data),
                   length(elements.W.Data)
@@ -420,34 +428,35 @@ GEF.MultiSite<-function(settings, Forecast, Observed, H, extraArg,...){
 
 ##' @title MCMC_function
 ##' @author Michael Dietze \email{dietze@@bu.edu}, Ann Raiho, Hamze Dokoohaki, and Dongchen Zhang.
-##' @param data list containing everything needed for the MCMC sampling.
+##' @param dat list containing everything needed for the MCMC sampling.
 ##' @details This function replace the previous code where implenmented the MCMC sampling part, which allows the MCMC sampling of multiple chains under parallel mode.
-MCMC_function <- function(data){
-  dimensions.tobit <- list(X = length(data$elements.W.Data),
-                           X.mod = ncol(data$X),
-                           Q = c(nrow(data$aqq), ncol(data$aqq))
+MCMC_function <- function(dat){
+  t = dat$t
+  dimensions.tobit <- list(X = length(dat$elements.W.Data),
+                           X.mod = ncol(dat$X),
+                           Q = c(nrow(dat$aqq), ncol(dat$aqq))
   )
   # Contants defined in the model
   constants.tobit <-
     list(
-      N = ncol(data$X),
-      YN = length(data$elements.W.Data),
-      nH = length(data$elements.W.Data),
-      H = data$elements.W.Data,
-      NotH = which(!(1:ncol(data$X) %in% data$elements.W.Data)),
-      nNotH = which(!(1:ncol(data$X) %in% data$elements.W.Data)) %>% length(),
-      q.type=data$q.type
+      N = ncol(dat$X),
+      YN = length(dat$elements.W.Data),
+      nH = length(dat$elements.W.Data),
+      H = dat$elements.W.Data,
+      NotH = which(!(1:ncol(dat$X) %in% dat$elements.W.Data)),
+      nNotH = which(!(1:ncol(dat$X) %in% dat$elements.W.Data)) %>% length(),
+      q.type=dat$q.type
     )
   # Data used for setting the likelihood and other stuff
   data.tobit <-
     list(
-      muf = as.vector(data$mu.f),
-      pf = data$Pf,
-      aq = data$aqq[,,t],
-      bq = data$bqq[t],
-      y.ind = data$y.ind,
-      y.censored = data$y.censored,
-      r = solve(data$R)
+      muf = as.vector(dat$mu.f),
+      pf = dat$Pf,
+      aq = dat$aqq[,,t],
+      bq = dat$bqq[t],
+      y.ind = dat$y.ind,
+      y.censored = dat$y.censored,
+      r = solve(dat$R)
     )
   if(constants.tobit$YN == 1){
     #add error message if trying to run SDA with 1 obs and 1 state variable no model currently exists to handle this case, need to remove for loop from GEF_singleobs_nimble for this case and save new model
@@ -462,11 +471,11 @@ MCMC_function <- function(data){
     constants.tobit$q.type <- NULL
     inits.pred <-
       list(
-        X.mod = as.vector(data$mu.f),
-        X = as.vector(data$mu.f)[data$elements.W.Data],
-        Xall = as.vector(data$mu.f),
-        Xs = as.vector(data$mu.f)[data$elements.W.Data],
-        q = diag(1, length(data$elements.W.Data), length(data$elements.W.Data))
+        X.mod = as.vector(dat$mu.f),
+        X = as.vector(dat$mu.f)[dat$elements.W.Data],
+        Xall = as.vector(dat$mu.f),
+        Xs = as.vector(dat$mu.f)[dat$elements.W.Data],
+        q = diag(1, length(dat$elements.W.Data), length(dat$elements.W.Data))
       )
     model_pred <- nimble::nimbleModel(GEF_singleobs_nimble,
                                       data = data.tobit,
@@ -483,16 +492,16 @@ MCMC_function <- function(data){
   }
   ## Adding X.mod,q,r as data for building model.
   conf <- nimble::configureMCMC(model_pred, print=TRUE)
-  conf$setMonitors(data$monitors) 
+  conf$setMonitors(dat$monitors) 
   samplerNumberOffset <- length(conf$getSamplers())
   
-  for(i in 1:length(data$y.ind)) {
+  for(i in 1:length(dat$y.ind)) {
     node <- paste0('y.censored[',i,']')
     conf$addSampler(node, 'toggle', control=list(type='RW'))
   }
   #handling samplers
   samplerLists <- conf$getSamplers()
-  samplerLists[[2]]$control <- list(propCov= data$Pf, adaptScaleOnly = TRUE, adaptive = TRUE)
+  samplerLists[[2]]$control <- list(propCov= dat$Pf, adaptScaleOnly = TRUE, adaptive = TRUE)
   conf$setSamplers(samplerLists)
   
   conf$printSamplers()
@@ -500,22 +509,22 @@ MCMC_function <- function(data){
   Cmodel <- nimble::compileNimble(model_pred)
   Cmcmc <- nimble::compileNimble(Rmcmc, project = model_pred, showCompilerOutput = TRUE)
   
-  for(i in 1:length(data$y.ind)) {
-    valueInCompiledNimbleFunction(Cmcmc$samplerFunctions[[samplerNumberOffset+i]], 'toggle', 1-data$y.ind[i])
+  for(i in 1:length(dat$y.ind)) {
+    valueInCompiledNimbleFunction(Cmcmc$samplerFunctions[[samplerNumberOffset+i]], 'toggle', 1-dat$y.ind[i])
   }
   inits <- function(){
-    ind <- sample(seq_along(1:nrow(data$X)), 1)
-    init_muf <- data$X[ind,]
+    ind <- sample(seq_along(1:nrow(dat$X)), 1)
+    init_muf <- dat$X[ind,]
     list(X.mod = as.vector(init_muf), 
-         X = as.vector(init_muf)[data$elements.W.Data], 
+         X = as.vector(init_muf)[dat$elements.W.Data], 
          Xall = as.vector(init_muf),
-         Xs = as.vector(init_muf)[data$elements.W.Data],
-         q = diag(1, length(data$elements.W.Data), length(data$elements.W.Data)))
+         Xs = as.vector(init_muf)[dat$elements.W.Data],
+         q = diag(1, length(dat$elements.W.Data), length(dat$elements.W.Data)))
   }
   if(exists("inits.pred")){
-    dat <- runMCMC(Cmcmc, niter = data$nitr.GEF, nburnin = data$nburnin, thin = data$nthin, nchains = 1)
+    dat <- runMCMC(Cmcmc, niter = dat$nitr.GEF, nburnin = dat$nburnin, thin = dat$nthin, nchains = 1)
   }else{
-    dat <- runMCMC(Cmcmc, niter = data$nitr.GEF, nburnin = data$nburnin, thin = data$nthin, nchains = 1, inits = inits)
+    dat <- runMCMC(Cmcmc, niter = dat$nitr.GEF, nburnin = dat$nburnin, thin = dat$nthin, nchains = 1, inits = inits)
   }
   return(dat)
-}
\ No newline at end of file
+}
diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R
index a1614aa93af..215b799fc37 100644
--- a/modules/assim.sequential/R/sda.enkf_MultiSite.R
+++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R
@@ -82,12 +82,7 @@ sda.enkf.multisite <- function(settings,
   
   forecast.time.step <- settings$state.data.assimilation$forecast.time.step  #idea for later generalizing
   nens       <- as.numeric(settings$ensemble$size)
-  processvar <- settings$state.data.assimilation$process.variance
-  if(processvar=="TRUE"){
-    processvar <- TRUE
-  }else{
-    processvar <- FALSE
-  }
+  processvar <- as.logical(settings$state.data.assimilation$process.variance)
   Localization.FUN <- settings$state.data.assimilation$Localization.FUN # localization function
   scalef <- settings$state.data.assimilation$scalef %>% as.numeric() # scale factor for localization
   var.names <- sapply(settings$state.data.assimilation$state.variable, '[[', "variable.name")
@@ -113,6 +108,7 @@ sda.enkf.multisite <- function(settings,
                         TRUE, 
                         settings$state.data.assimilation$censored.data %>% 
                           as.logical)
+  if(is.null(settings$state.data.assimilation$chains)) settings$state.data.assimilation$chains = 3
   #--------Initialization
   FORECAST    <- ANALYSIS <- ens_weights <- list()
   enkf.params <- list()
@@ -132,6 +128,11 @@ sda.enkf.multisite <- function(settings,
   if(multi.site.flag){
     conf.settings<-settings
     site.ids <- conf.settings$run %>% purrr::map('site') %>% purrr::map('id') %>% base::unlist() %>% base::as.character()
+    if(length(site.ids)==0){   ### these hacks should not be nessisary
+      for(s in seq_along(conf.settings$run)){
+        site.ids[s] = conf.settings[[s]]$run[[s]]$site$id
+      }
+    }
     # a matrix ready to be sent to spDistsN1 in sp package - first col is the long second is the lat and row names are the site ids
     site.locs <- conf.settings$run %>% purrr::map('site') %>% purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>% 
       t %>%
diff --git a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
index a0af1b23f4b..8395b60af95 100644
--- a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
+++ b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
@@ -3,7 +3,7 @@
 ###########  CONFIGURATION ###################
 
 ## original start date was 2022-05-17
-runDays <- seq(as.Date("2023-05-30"), as.Date("2023-06-13"), by="days")
+runDays <- seq(as.Date("2023-07-06"), as.Date("2023-07-08"), by="days")
 FORCE = FALSE  ## should we overwrite previously completed runs
 
 ## forecast configuration
@@ -93,7 +93,7 @@ minio_uri_public <- function(...) {
   sprintf(template, minio_path(...), minio_host, ":", minio_port)
 }
 
-runDays <- seq(as.Date("2022-11-25"), as.Date("2023-06-13"), by="days")
+runDays <- seq(as.Date("2023-06-22"), as.Date("2023-07-10"), by="days")
 
 ## loop over dates
 FORCE = FALSE
@@ -146,4 +146,4 @@ for (s in seq_along(runDays)) {
 ## list set of uploaded dates
 submitted = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% 
   dplyr::distinct(reference_datetime) %>% dplyr::collect() %>% as.vector()
-
+sort(submitted$reference_datetime)
diff --git a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
index 34eaceb7976..3cc2c17a6b5 100644
--- a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
+++ b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
@@ -1,4 +1,6 @@
+#!/usr/bin/env Rscript
 ## Forecast helper script around 05_SDA_Workflow_NA
+.libPaths(c("/projectnb/dietzelab/dietze/test-pecan/R/library",.libPaths()))
 
 ## forecast configuration
 projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/" ## main folder
diff --git a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
index 0a4e183e14a..d54acb9e19b 100755
--- a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
+++ b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
@@ -4,7 +4,9 @@
 # ----------------------------------------------------------------------
 #------------------------------------------ Load required libraries-----
 # ----------------------------------------------------------------------
+.libPaths(c("/projectnb/dietzelab/dietze/test-pecan/R/library",.libPaths()))
 library("PEcAn.all")
+library("PEcAn.settings")
 library("PEcAn.utils")
 library("PEcAn.data.remote")
 library("PEcAnAssimSequential")
@@ -58,7 +60,7 @@ projectdir = set$outdir
 # --------------------------------------------------------------------------------------------------
 
 #initialize obs.mean/cov NAs
-site.ids <- papply(set,function(x)(x$run$site$id)) %>% unlist() %>% as.character()
+site.ids <- PEcAn.settings::papply(set,function(x)(x$run$site$id)) %>% unlist() %>% as.character()
 nsite = length(site.ids)
 
 NAdata = data.frame(date = c(rep(start.date,nsite),rep(sda.start,nsite)),
@@ -130,16 +132,16 @@ set$pfts$pft$outdir = file.path(set$outdir,"pft")
 set$host$rundir <- set$rundir
 set$host$outdir <- set$modeloutdir
 set$host$folder <- set$modeloutdir
-dir.create(set$outdir)
-dir.create(set$rundir)
-dir.create(set$modeloutdir)
-dir.create(set$pfts$pft$outdir)
+dir.create(set$outdir,showWarnings = FALSE)
+dir.create(set$rundir,showWarnings = FALSE)
+dir.create(set$modeloutdir,showWarnings = FALSE)
+dir.create(set$pfts$pft$outdir,showWarnings = FALSE)
 
 #manually add in clim files 
 path = "/projectnb/dietzelab/ahelgeso/NOAA_met_data_CH1/noaa_clim/HARV/" ## hack
 met_paths <- list.files(path = file.path(path, start.date), full.names = TRUE, pattern = ".clim")
 #met_paths <- list.files(path = file.path(settings$run$settings.1$inputs$met$path, start.date), full.names = TRUE, pattern = ".clim")
-if(is_empty(met_paths)){
+if(purrr::is_empty(met_paths)){
   print(paste("SKIPPING: NO MET FOR",start.date))
   cat(as.character(start.date),sep="\n",file=file.path(dirname(set$outdir),"NO_MET"),append=TRUE) ## add to list of dates missing met
   stop_quietly()
@@ -159,7 +161,7 @@ rownames(run_id) <- NULL
 run_id = as.data.frame(run_id) %>% mutate(folder=prev_run_ids,id = paste0("id",.data$ens)) %>% group_by(site)
 ###settings$runs$id = run_id
 for(s in seq_along(set)){
-  site_run_id = run_id %>% filter(site == set[[s]]$run$site$id)
+  site_run_id = run_id |> filter(site == as.list(set$run[[s]]$site$id)[[1]])
   set[[s]]$run$id =  as.list(site_run_id$folder)
   names(set[[s]]$run$id) = site_run_id$id
 }
@@ -177,6 +179,8 @@ sda.enkf.multisite(settings = set,
                    restart = restart, 
                    forceRun = TRUE, 
                    keepNC = TRUE, 
+                   run_parallel = FALSE,
+                   parallel_qsub = FALSE,
                    control = list(trace = TRUE,
                                   FF = FALSE,
                                   interactivePlot = FALSE,
@@ -187,7 +191,8 @@ sda.enkf.multisite(settings = set,
                                   debug = FALSE,
                                   pause = FALSE,
                                   Profiling = FALSE,
-                                  OutlierDetection=FALSE))
+                                  OutlierDetection=FALSE,
+                                  free_run = FALSE))  ## seems to be defined twice
 
 
 
diff --git a/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd b/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd
index 62ee2efec99..7543c28444b 100644
--- a/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd
+++ b/modules/assim.sequential/inst/hf_landscape/06_cron.Rmd
@@ -10,18 +10,41 @@ library(cronR)
 
 ## Hello World
 ```{r}
-
+cmd <- cronR::cron_rscript("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/HelloWorld.R")
+cron_add(command = cmd, frequency = '*/5 * * * *', id = 'HelloWorld', description = "Every 5 min")
+cron_njobs()
+cron_ls()
+## wait for cron to run successfully then remove from queue
+cron_rm("HelloWorld")
 ```
 
 ## Met forecast (taking over for Alexis)
 ```{r}
+cmd <- cronR::cron_rscript("/home/dietze/pecan/scripts/HARV_metdownload_efi.R")
+cron_add(command = cmd, frequency = '0 */6 * * *', id = 'NOAA_GEFS', description = "NOAA GEFS forecasts via EFI server; update every 6 hr")
+cron_njobs()
+cron_ls()
 
 ```
 
+## manual catchup on met
+```{r}
+runDays <- seq(as.Date("2023-06-24"), as.Date("2023-07-09"), by="days")
+for (s in seq_along(runDays)) {
+  msg = system2(file.path(pecanhome,"scripts/HARV_metdownload_efi.R"),
+                paste("--start.date",runDays[s]),
+                wait=TRUE)#,
+             #   stdout="stdout.log",
+            #    stderr="stderr.log")
+  print(msg)
+}
+```
+
+
 
 ```{r}
 ### NO ASSIMILATION FORECAST
-cmd <- cronR::cron_rscript("~/pecan/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R")
+cmd <- cronR::cron_rscript("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R")
 cron_add(command = cmd, frequency = 'daily', at='4AM', id = 'HARV_FNA', description = "Harvard Forest landscape forecast, No Assimilated data")
 cron_njobs()
 cron_ls()
@@ -30,7 +53,7 @@ cron_ls()
 
 ## SMAP DATA DOWNLOAD
 ```{r}
-cmd <- cronR::cron_rscript("~/pecan/modules/assim.sequential/inst/hf_landscape/07_SMAP.R")
+cmd <- cronR::cron_rscript("/home/dietze/pecan/modules/assim.sequential/inst/hf_landscape/07_SMAP.R")
 cronR::cron_add(command = cmd, frequency = 'daily', at='11PM', id = 'SMAP', description = "SMAP download")
 cronR::cron_njobs()
 cronR::cron_ls()
diff --git a/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R b/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R
index 304e2ecd245..4bace0a4de4 100644
--- a/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R
+++ b/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R
@@ -85,9 +85,10 @@ PEcAn2EFI.ens <- function(outdir,run.id,start_date,end_date = NULL){
   PEcAn.logger::logger.info("reading ensemble output from run id: ", format(run.id, scientific = FALSE))
   ensemble.output <- PEcAn.utils::read.output(run.id, file.path(outdir, run.id), start.year, end.year, variables=NULL)
   
-  if(!is.numeric(nrow(ensemble.output))) return(NULL)
-  
+  if(length(ensemble.output)==0)  return(NULL)
+    
   ## reprocess time_bounds
+  if(length(dim(ensemble.output$time_bounds)<2)) ensemble.output$time_bounds = matrix(ensemble.output$time_bounds,nrow=2)
   doy = as.vector(ensemble.output$time_bounds[1,])  ## day of year
   years = c(which(doy < 0.00001),length(doy)+1) ## get breaks between years
   if(years[1] != 1) years = c(1,years) ## add current year if not start of year
diff --git a/modules/assim.sequential/man/MCMC_function.Rd b/modules/assim.sequential/man/MCMC_function.Rd
index 7f32ab3faf6..bce56e3ae5e 100644
--- a/modules/assim.sequential/man/MCMC_function.Rd
+++ b/modules/assim.sequential/man/MCMC_function.Rd
@@ -4,10 +4,10 @@
 \alias{MCMC_function}
 \title{MCMC_function}
 \usage{
-MCMC_function(data)
+MCMC_function(dat)
 }
 \arguments{
-\item{data}{list containing everything needed for the MCMC sampling.}
+\item{dat}{list containing everything needed for the MCMC sampling.}
 }
 \description{
 MCMC_function
diff --git a/modules/data.atmosphere/NAMESPACE b/modules/data.atmosphere/NAMESPACE
index 7183782887f..39e2937891b 100644
--- a/modules/data.atmosphere/NAMESPACE
+++ b/modules/data.atmosphere/NAMESPACE
@@ -36,6 +36,7 @@ export(download.NARR_site)
 export(download.NEONmet)
 export(download.NLDAS)
 export(download.NOAA_GEFS)
+export(download.NOAA_GEFS_EFI)
 export(download.PalEON)
 export(download.PalEON_ENS)
 export(download.US_WCr)
diff --git a/modules/data.atmosphere/R/download_noaa_gefs_efi.R b/modules/data.atmosphere/R/download_noaa_gefs_efi.R
index 0483b8c3331..c01c98007c8 100644
--- a/modules/data.atmosphere/R/download_noaa_gefs_efi.R
+++ b/modules/data.atmosphere/R/download_noaa_gefs_efi.R
@@ -1,4 +1,4 @@
-#' download_NOAA_GEFS_EFI
+#' download.NOAA_GEFS_EFI
 #'
 #' @param start_date start date for met forecast
 #' @param sitename NEON site name
@@ -10,8 +10,10 @@
 #'
 #'
 #' @author Alexis Helgeson
+#' @author Mike Dietze
+#' @export
 #' 
-download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, site.lon){
+download.NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, site.lon){
   #using the stage2 fcn mean that the met as already been downscaled and gapfilled to 1 hr intervals
   met = PEcAn.data.atmosphere::noaa_stage2(cycle = 0, 
                     version = "v12", 
@@ -20,9 +22,9 @@ download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, si
                     start_date = start_date)
   
   weather = met %>% 
-    dplyr::filter(.data$reference_datetime == as.POSIXct(start_date,tz="UTC"), sitename == sitename) %>%
+    dplyr::filter(.data$reference_datetime == as.POSIXct(start_date,tz="UTC"), site_id == sitename) %>%
     dplyr::collect() %>%
-    dplyr::select(.data$sitename, .data$prediction, .data$variable, .data$horizon, .data$parameter, .data$datetime)
+    dplyr::select(.data$site_id, .data$prediction, .data$variable, .data$horizon, .data$parameter, .data$datetime)
   
   PEcAn.logger::logger.info("Met Aquired for", sitename, "on", as.character(start_date))
   #grab/calculate timestep, this might not be necessary b/c of the datetime column?
@@ -55,7 +57,7 @@ download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, si
   #adding in windspeed and specific humidity
   cf_var_names1 <- c("surface_downwelling_longwave_flux_in_air", "surface_downwelling_shortwave_flux_in_air", "precipitation_flux", "air_pressure", "relative_humidity", "air_temperature", "specific_humidity", "wind_speed")
   cf_var_units1 <- c("Wm-2", "Wm-2", "kgm-2s-1", "Pa", "1", "K", "1", "ms-1")
-  #calculate specific humdity using realtive humidity (no unit conversion requied as relative humidity is in range 0-1), air temperature (no unit conversion already in K), and air pressure (no unit conversion already in Pa)
+  #calculate specific humdity using relative humidity (no unit conversion requied as relative humidity is in range 0-1), air temperature (no unit conversion already in K), and air pressure (no unit conversion already in Pa)
   specific_humidity <- rep(NA, length(noaa_data$relative_humidity$value))
   specific_humidity[which(!is.na(noaa_data$relative_humidity$value))] <- PEcAn.data.atmosphere::rh2qair(rh = noaa_data$relative_humidity$value[which(!is.na(noaa_data$relative_humidity$value))],
                                                                                                         T = noaa_data$air_temperature$value[which(!is.na(noaa_data$relative_humidity$value))],
diff --git a/modules/data.atmosphere/man/download_NOAA_GEFS_EFI.Rd b/modules/data.atmosphere/man/download.NOAA_GEFS_EFI.Rd
similarity index 70%
rename from modules/data.atmosphere/man/download_NOAA_GEFS_EFI.Rd
rename to modules/data.atmosphere/man/download.NOAA_GEFS_EFI.Rd
index e6f23a6699d..eed1d3b757d 100644
--- a/modules/data.atmosphere/man/download_NOAA_GEFS_EFI.Rd
+++ b/modules/data.atmosphere/man/download.NOAA_GEFS_EFI.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/download_noaa_gefs_efi.R
-\name{download_NOAA_GEFS_EFI}
-\alias{download_NOAA_GEFS_EFI}
-\title{download_NOAA_GEFS_EFI}
+\name{download.NOAA_GEFS_EFI}
+\alias{download.NOAA_GEFS_EFI}
+\title{download.NOAA_GEFS_EFI}
 \usage{
-download_NOAA_GEFS_EFI(sitename, outfolder, start_date, site.lat, site.lon)
+download.NOAA_GEFS_EFI(sitename, outfolder, start_date, site.lat, site.lon)
 }
 \arguments{
 \item{sitename}{NEON site name}
@@ -21,8 +21,10 @@ download_NOAA_GEFS_EFI(sitename, outfolder, start_date, site.lat, site.lon)
 message confirming download complete and location of .nc files
 }
 \description{
-download_NOAA_GEFS_EFI
+download.NOAA_GEFS_EFI
 }
 \author{
 Alexis Helgeson
+
+Mike Dietze
 }
diff --git a/scripts/HARV_metdownload_efi.R b/scripts/HARV_metdownload_efi.R
old mode 100644
new mode 100755
index abab73d3a39..54684ae6d09
--- a/scripts/HARV_metdownload_efi.R
+++ b/scripts/HARV_metdownload_efi.R
@@ -1,44 +1,64 @@
+#!/usr/bin/env Rscript
 #load libraries
+.libPaths(c("/projectnb/dietzelab/dietze/test-pecan/R/library",.libPaths()))
 library(dplyr)
-#load fcns
-source("/projectnb/dietzelab/ahelgeso/Forecast_Scripts/download_noaa_gefs_efi.R")
-source("/projectnb/dietzelab/ahelgeso/Forecast_Scripts/noaa_gefs_efi_helper.R")
+option_list = list(optparse::make_option("--start.date",
+                                         default = Sys.Date(),
+                                         type="character"),
+                   optparse::make_option("--jumpback",
+                                         default = 10,
+                                         type="integer")
+)
+args <- optparse::parse_args(optparse::OptionParser(option_list = option_list))
+if(is.null(args$jumpback)) args$jumpback = 10
+jumpback = args$jumpback
+
 #set fcn inputs
-startdate = format(Sys.Date(), "%Y-%m-%d")
+startdate = as.Date(args$start.date)
 nc_dir = "/projectnb/dietzelab/ahelgeso/NOAA_met_data/"
 site.lat = 42.5
 site.lon = -72.15
 sitename <- "HARV"
 siteid <- 646
 clim_dir <- "/projectnb/dietzelab/ahelgeso/NOAA_met_data_CH1/"
+runDays <- format(seq(from=as.Date(startdate - lubridate::days(jumpback)),
+               to=as.Date(as.character(startdate)),
+               by="days"),"%Y-%m-%d")
+print("args set")
 
-#download met using EFI fcns
-download_NOAA_GEFS_EFI(sitename = sitename,
-                       outfolder = nc_dir,
-                       start_date = startdate,
-                       site.lat = site.lat,
-                       site.lon = site.lon)
-#set up path for met2model
-output_path <- file.path("/projectnb/dietzelab/ahelgeso/NOAA_met_data/noaa/NOAAGEFS_1hr", sitename, startdate, "00")
-########## Met2Model For SIPNET ##############
-outfolder = file.path(clim_dir, "noaa_clim", sitename, startdate)
-if(!dir.exists(outfolder)){dir.create(outfolder, recursive = TRUE)}
-
-in.path = output_path
-in.prefix = list.files(output_path)
-
-end_date = as.Date(startdate) + lubridate::days(35)
-
-for(l in 1:length(in.prefix)){
+for(t in seq_along(runDays)){
+  print(runDays[t])
+  
+  #download met using EFI fcns
+  tmp = PEcAn.data.atmosphere:::download.NOAA_GEFS_EFI(sitename = sitename,
+                                                 outfolder = nc_dir,
+                                                 start_date = runDays[t],
+                                                 site.lat = site.lat,
+                                                 site.lon = site.lon)
+  #set up path for met2model
+  output_path <- file.path("/projectnb/dietzelab/ahelgeso/NOAA_met_data/noaa/NOAAGEFS_1hr", sitename, runDays[t], "00")
+  ########## Met2Model For SIPNET ##############
+  outfolder = file.path(clim_dir, "noaa_clim", sitename, runDays[t])
+  if(!dir.exists(outfolder)){dir.create(outfolder, recursive = TRUE)}
+  
+  in.path = output_path
+  in.prefix = list.files(output_path)
+  
+  end_date = as.Date(runDays[t]) + lubridate::days(35)
   
-  PEcAn.SIPNET::met2model.SIPNET(in.path = in.path, 
-                                 in.prefix = in.prefix[l], 
-                                 outfolder = outfolder, 
-                                 start_date = startdate, 
-                                 end_date = end_date,
-                                 overwrite = FALSE,
-                                 verbose = FALSE, 
-                                 year.fragment = TRUE) 
+  for(l in 1:length(in.prefix)){
+    
+    PEcAn.SIPNET::met2model.SIPNET(in.path = in.path, 
+                                   in.prefix = in.prefix[l], 
+                                   outfolder = outfolder, 
+                                   start_date = runDays[t], 
+                                   end_date = end_date,
+                                   overwrite = FALSE,
+                                   verbose = FALSE, 
+                                   year.fragment = TRUE) 
+    
+  } 
   
-} 
+}
+print("done")
 

From 5512ae7f4061a792c277eaa5167eaeac667339c4 Mon Sep 17 00:00:00 2001
From: Mike Dietze <dietze@bu.edu>
Date: Mon, 10 Jul 2023 15:27:16 -0400
Subject: [PATCH 3/7] fix run.write.configs to allow reuse of previous ensemble
 samples. Added so that parameters are not resampled site-by-site in a
 multi-site run.

---
 base/workflow/R/run.write.configs.R           | 108 ++++++++++--------
 base/workflow/R/runModule.run.write.configs.R |  12 +-
 base/workflow/man/run.write.configs.Rd        |   5 +-
 .../man/runModule.run.write.configs.Rd        |   8 +-
 4 files changed, 80 insertions(+), 53 deletions(-)

diff --git a/base/workflow/R/run.write.configs.R b/base/workflow/R/run.write.configs.R
index 70ed76f7f51..c7329858d63 100644
--- a/base/workflow/R/run.write.configs.R
+++ b/base/workflow/R/run.write.configs.R
@@ -11,6 +11,7 @@
 #' @param posterior.files Filenames for posteriors for drawing samples for ensemble and sensitivity
 #'    analysis (e.g. post.distns.Rdata, or prior.distns.Rdata)
 #' @param overwrite logical: Replace output files that already exist?
+#' @param use.existing.samples FALSE (default), TRUE, or filename
 #'
 #' @details The default value for \code{posterior.files} is NA, in which case the
 #'    most recent posterior or prior (in that order) for the workflow is used.
@@ -24,7 +25,7 @@
 #' @author David LeBauer, Shawn Serbin, Ryan Kelly, Mike Dietze
 run.write.configs <- function(settings, write = TRUE, ens.sample.method = "uniform", 
                               posterior.files = rep(NA, length(settings$pfts)), 
-                              overwrite = TRUE) {
+                              overwrite = TRUE,use.existing.samples=FALSE) {
   tryCatch({
     con <- PEcAn.DB::db.open(settings$database$bety)
     on.exit(PEcAn.DB::db.close(con), add = TRUE)
@@ -34,57 +35,68 @@ run.write.configs <- function(settings, write = TRUE, ens.sample.method = "unifo
       conditionMessage(e))
   })
   
-  ## Which posterior to use?
-  for (i in seq_along(settings$pfts)) {
-    ## if posterior.files is specified us that
-    if (is.na(posterior.files[i])) {
-      ## otherwise, check to see if posteriorid exists
-      if (!is.null(settings$pfts[[i]]$posteriorid)) {
-        #TODO: sometimes `files` is a 0x0 tibble and other operations with it fail.
-        files <- PEcAn.DB::dbfile.check("Posterior",
-                                        settings$pfts[[i]]$posteriorid, 
-                                        con, settings$host$name, return.all = TRUE)
-        pid <- grep("post.distns.*Rdata", files$file_name)  ## is there a posterior file?
-        if (length(pid) == 0) {
-          pid <- grep("prior.distns.Rdata", files$file_name)  ## is there a prior file?
-        }
-        if (length(pid) > 0) {
-          posterior.files[i] <- file.path(files$file_path[pid], files$file_name[pid])
-        }  ## otherwise leave posteriors as NA
-      }
-      ## otherwise leave NA and get.parameter.samples will look for local
-    } else {
-      ## does posterior.files point to a directory instead of a file?
-      if(utils::file_test("-d",posterior.files[i])){
-        pfiles = dir(posterior.files[i],pattern = "post.distns.*Rdata",full.names = TRUE)
-        if(length(pfiles)>1){
-          pid = grep("post.distns.Rdata",pfiles)
-          if(length(pid > 0)){
-            pfiles = pfiles[grep("post.distns.Rdata",pfiles)]
-          } else {
-            PEcAn.logger::logger.error(
-              "run.write.configs: could uniquely identify posterior files within",
-              posterior.files[i])
-          }
-          posterior.files[i] = pfiles
-        }
-      }
-      ## also, double check PFT outdir exists
-      if (is.null(settings$pfts[[i]]$outdir) || is.na(settings$pfts[[i]]$outdir)) {
-        ## no outdir
-        settings$pfts[[i]]$outdir <- file.path(settings$outdir, "pfts", settings$pfts[[i]]$name)
-      }
-    }  ## end else
-  } ## end for loop over pfts
-  
-  ## Sample parameters
   model <- settings$model$type
   scipen <- getOption("scipen")
   options(scipen = 12)
-
-  PEcAn.uncertainty::get.parameter.samples(settings, posterior.files, ens.sample.method)
-  load(file.path(settings$outdir, "samples.Rdata")) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples
   
+  ## Which posterior to use?
+  if(is.logical(use.existing.samples)){
+    if(use.existing.samples & file.exists(file.path(settings$outdir, "samples.Rdata"))){
+      load(file.path(settings$outdir, "samples.Rdata")) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples
+    } else {
+      for (i in seq_along(settings$pfts)) {
+        ## if posterior.files is specified us that
+        if (is.na(posterior.files[i])) {
+          ## otherwise, check to see if posteriorid exists
+          if (!is.null(settings$pfts[[i]]$posteriorid)) {
+            #TODO: sometimes `files` is a 0x0 tibble and other operations with it fail.
+            files <- PEcAn.DB::dbfile.check("Posterior",
+                                            settings$pfts[[i]]$posteriorid, 
+                                            con, settings$host$name, return.all = TRUE)
+            pid <- grep("post.distns.*Rdata", files$file_name)  ## is there a posterior file?
+            if (length(pid) == 0) {
+              pid <- grep("prior.distns.Rdata", files$file_name)  ## is there a prior file?
+            }
+            if (length(pid) > 0) {
+              posterior.files[i] <- file.path(files$file_path[pid], files$file_name[pid])
+            }  ## otherwise leave posteriors as NA
+          }
+          ## otherwise leave NA and get.parameter.samples will look for local
+        } else {
+          ## does posterior.files point to a directory instead of a file?
+          if(utils::file_test("-d",posterior.files[i])){
+            pfiles = dir(posterior.files[i],pattern = "post.distns.*Rdata",full.names = TRUE)
+            if(length(pfiles)>1){
+              pid = grep("post.distns.Rdata",pfiles)
+              if(length(pid > 0)){
+                pfiles = pfiles[grep("post.distns.Rdata",pfiles)]
+              } else {
+                PEcAn.logger::logger.error(
+                  "run.write.configs: could uniquely identify posterior files within",
+                  posterior.files[i])
+              }
+              posterior.files[i] = pfiles
+            }
+          }
+          ## also, double check PFT outdir exists
+          if (is.null(settings$pfts[[i]]$outdir) || is.na(settings$pfts[[i]]$outdir)) {
+            ## no outdir
+            settings$pfts[[i]]$outdir <- file.path(settings$outdir, "pfts", settings$pfts[[i]]$name)
+          }
+        }  ## end else
+      } ## end for loop over pfts
+      
+      ## Sample parameters
+      PEcAn.uncertainty::get.parameter.samples(settings, posterior.files, ens.sample.method)
+      load(file.path(settings$outdir, "samples.Rdata")) ## loads ensemble.samples, trait.samples, sa.samples, runs.samples, env.samples
+      
+      ## end use.existing.samples == FALSE
+    } ## is.logical(use.existing.samples)
+  } else {
+    ### use.existing.samples must be a filename
+    load(use.existing.samples)
+  }
+
   ## remove previous runs.txt
   if (overwrite && file.exists(file.path(settings$rundir, "runs.txt"))) {
     PEcAn.logger::logger.warn("Existing runs.txt file will be removed.")
diff --git a/base/workflow/R/runModule.run.write.configs.R b/base/workflow/R/runModule.run.write.configs.R
index 265da4f6c1c..1bddef506f1 100644
--- a/base/workflow/R/runModule.run.write.configs.R
+++ b/base/workflow/R/runModule.run.write.configs.R
@@ -2,16 +2,17 @@
 #'
 #' @param settings a PEcAn Settings or MultiSettings object
 #' @param overwrite logical: Replace config files if they already exist?
+#' @param use.existing.samples FALSE (default), TRUE, or filename
 #' @return A modified settings object, invisibly
 #' @export
-runModule.run.write.configs <- function(settings, overwrite = TRUE) {
+runModule.run.write.configs <- function(settings, overwrite = TRUE, use.existing.samples = TRUE) {
 
   if (PEcAn.settings::is.MultiSettings(settings)) {
     if (overwrite && file.exists(file.path(settings$rundir, "runs.txt"))) {
       PEcAn.logger::logger.warn("Existing runs.txt file will be removed.")
       unlink(file.path(settings$rundir, "runs.txt"))
     }
-    return(PEcAn.settings::papply(settings, runModule.run.write.configs, overwrite = FALSE))
+    return(PEcAn.settings::papply(settings, runModule.run.write.configs, overwrite = overwrite,use.existing.samples=use.existing.samples))
   } else if (PEcAn.settings::is.Settings(settings)) {
     write <- settings$database$bety$write
     # double check making sure we have method for parameter sampling
@@ -27,7 +28,12 @@ runModule.run.write.configs <- function(settings, overwrite = TRUE) {
       }) %>%
       unlist()
     
-    return(PEcAn.workflow::run.write.configs(settings, write, ens.sample.method, posterior.files = posterior.files, overwrite = overwrite))
+    return(PEcAn.workflow::run.write.configs(settings, 
+                                             write, 
+                                             ens.sample.method, 
+                                             posterior.files = posterior.files, 
+                                             overwrite = overwrite, 
+                                             use.existing.samples = use.existing.samples))
   } else {
     stop("runModule.run.write.configs only works with Settings or MultiSettings")
   }
diff --git a/base/workflow/man/run.write.configs.Rd b/base/workflow/man/run.write.configs.Rd
index b5be69a3287..0f88633d886 100644
--- a/base/workflow/man/run.write.configs.Rd
+++ b/base/workflow/man/run.write.configs.Rd
@@ -9,7 +9,8 @@ run.write.configs(
   write = TRUE,
   ens.sample.method = "uniform",
   posterior.files = rep(NA, length(settings$pfts)),
-  overwrite = TRUE
+  overwrite = TRUE,
+  use.existing.samples = FALSE
 )
 }
 \arguments{
@@ -23,6 +24,8 @@ run.write.configs(
 analysis (e.g. post.distns.Rdata, or prior.distns.Rdata)}
 
 \item{overwrite}{logical: Replace output files that already exist?}
+
+\item{use.existing.samples}{FALSE (default), TRUE, or filename}
 }
 \value{
 an updated settings list, which includes ensemble IDs for SA and ensemble analysis
diff --git a/base/workflow/man/runModule.run.write.configs.Rd b/base/workflow/man/runModule.run.write.configs.Rd
index fcb253947b3..859839b40b7 100644
--- a/base/workflow/man/runModule.run.write.configs.Rd
+++ b/base/workflow/man/runModule.run.write.configs.Rd
@@ -4,12 +4,18 @@
 \alias{runModule.run.write.configs}
 \title{Generate model-specific run configuration files for one or more PEcAn runs}
 \usage{
-runModule.run.write.configs(settings, overwrite = TRUE)
+runModule.run.write.configs(
+  settings,
+  overwrite = TRUE,
+  use.existing.samples = TRUE
+)
 }
 \arguments{
 \item{settings}{a PEcAn Settings or MultiSettings object}
 
 \item{overwrite}{logical: Replace config files if they already exist?}
+
+\item{use.existing.samples}{FALSE (default), TRUE, or filename}
 }
 \value{
 A modified settings object, invisibly

From c5180ceda737824ac27505a8c76bf068c497dae3 Mon Sep 17 00:00:00 2001
From: Mike Dietze <dietze@bu.edu>
Date: Fri, 14 Jul 2023 10:13:50 -0400
Subject: [PATCH 4/7] updates. trying to deal with keeping ensemble met
 together and get the 35 day forecast working.

---
 models/sipnet/R/model2netcdf.SIPNET.R         |  4 +-
 models/sipnet/man/model2netcdf.SIPNET.Rd      |  2 +-
 modules/assim.sequential/R/metSplit.R         | 43 ++++++++-
 .../assim.sequential/R/sda.enkf_MultiSite.R   | 39 ++++----
 .../hf_landscape/04_ForwardOnlyForecast.R     | 49 +++++++---
 .../05B_SDA_Workflow_NA.reanalysis.R          | 90 ++++++++++++++++---
 .../05C_SDA_Workflow_NA.forecast.R            | 18 ++--
 .../inst/hf_landscape/05_SDA_Workflow_NA.R    | 32 ++++---
 .../inst/hf_landscape/07_SMAP.R               |  4 +-
 modules/assim.sequential/man/metSplit.Rd      |  5 +-
 10 files changed, 215 insertions(+), 71 deletions(-)

diff --git a/models/sipnet/R/model2netcdf.SIPNET.R b/models/sipnet/R/model2netcdf.SIPNET.R
index 33ca6480dc7..f54e1d94944 100644
--- a/models/sipnet/R/model2netcdf.SIPNET.R
+++ b/models/sipnet/R/model2netcdf.SIPNET.R
@@ -103,7 +103,7 @@ sipnet2datetime <- function(sipnet_tval, base_year, base_month = 1,
 ##' @export
 ##' @author Shawn Serbin, Michael Dietze
 model2netcdf.SIPNET <- function(outdir, sitelat, sitelon, start_date, end_date, delete.raw = FALSE, revision, prefix = "sipnet.out",
-                                overwrite = FALSE, conflict = FALSE) {
+                                overwrite = FALSE, conflict = TRUE) {
 
   ### Read in model output in SIPNET format
   sipnet_out_file <- file.path(outdir, prefix)
@@ -323,4 +323,4 @@ model2netcdf.SIPNET <- function(outdir, sitelat, sitelon, start_date, end_date,
   }
 } # model2netcdf.SIPNET
 #--------------------------------------------------------------------------------------------------#
-### EOF
\ No newline at end of file
+### EOF
diff --git a/models/sipnet/man/model2netcdf.SIPNET.Rd b/models/sipnet/man/model2netcdf.SIPNET.Rd
index 3853c307e6a..697ec32d1bd 100644
--- a/models/sipnet/man/model2netcdf.SIPNET.Rd
+++ b/models/sipnet/man/model2netcdf.SIPNET.Rd
@@ -14,7 +14,7 @@ model2netcdf.SIPNET(
   revision,
   prefix = "sipnet.out",
   overwrite = FALSE,
-  conflict = FALSE
+  conflict = TRUE
 )
 }
 \arguments{
diff --git a/modules/assim.sequential/R/metSplit.R b/modules/assim.sequential/R/metSplit.R
index e48156c0ccc..0e692df6931 100644
--- a/modules/assim.sequential/R/metSplit.R
+++ b/modules/assim.sequential/R/metSplit.R
@@ -10,13 +10,14 @@
 #' @param nens number of ensemble members, taken from settings object
 #' @param restart_flag TRUE/FALSE taken from restart arguement
 #' @param my.split_inputs generated by sda.enkf_MultiSite ex. split_inputs.SIPNET
+#' @param outpath if specified, write output to a new directory. Default NULL writes back to the directory being read
 #' @export
 #'
 #' @return input.split object with split met filepaths
 #' 
 #' @author Alexis Helgeson
 #'
-metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, obs.times, t, nens, restart_flag = FALSE, my.split_inputs){
+metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, obs.times, t, nens, restart_flag = FALSE, my.split_inputs, outpath=NULL){
   
   #set start and end date for splitting met
   start.time = obs.times[t - 1] #always start timestep before
@@ -26,6 +27,17 @@ metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, o
   }else{
     stop.time = obs.times[t]
   }
+  
+  inputs.orig = inputs
+  nens.orig   = nens
+  if(!is.null(outpath)){
+    ## build a temporary inputs object based on original inputs in settings
+    for(i in seq_along(inputs)){
+      inputs[[i]]$samples = settings[[i]]$run$inputs$met$path
+    }
+    nens = length(settings[[i]]$run$inputs$met$path)
+  }
+  
   #-Splitting the input for the models that they don't care about the start and end time of simulations and they run as long as their met file.
   inputs.split <- 
     furrr::future_pmap(list(conf.settings %>% `class<-`(c("list")), inputs, model), function(settings, inputs, model) {
@@ -42,7 +54,9 @@ metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, o
               settings = settings,
               start.time = (lubridate::ymd_hms(start.time, truncated = 3) + lubridate::second(lubridate::hms("00:00:01"))),
               stop.time =   lubridate::ymd_hms(stop.time, truncated = 3),
-              inputs = inputs$samples[[i]])
+              inputs = inputs$samples[[i]],
+              outpath = file.path(outpath,settings$run$site$id)
+            )
           )
         }
       } else{
@@ -50,6 +64,29 @@ metSplit <- function(conf.settings, inputs, settings, model, no_split = FALSE, o
       }
       inputs.split
     })
-  
+    
+  ## match split original files back to resampled
+  if(!is.null(outpath)){
+    
+    new.inputs = inputs.orig
+    for(i in seq_along(inputs)){
+      new.inputs[[i]] = list()
+      parent.input = settings[[i]]$run$inputs$met$path ## set of parent met
+      for(e in seq_along(inputs.orig[[i]]$samples)){
+        ## extract prefix from old split ensemble member
+        old = sub(".clim","",basename(inputs.orig[[i]]$samples[[e]]),fixed=TRUE)
+        old = strsplit(old,split=".",fixed=TRUE)[[1]][1]
+        ## match ensemble member
+        ens = grep(old,unlist(parent.input)) 
+        ## swap
+        new.inputs[[i]]$samples[[e]] = inputs.split[[i]]$samples[[ens]]
+      }
+      names(new.inputs[[i]]$samples) = rep("path",nens.orig)
+    }
+    
+    inputs.split = new.inputs
+    
+ } ### end match
+
   return(inputs.split)
 }
diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R
index 215b799fc37..a0f02a29fee 100644
--- a/modules/assim.sequential/R/sda.enkf_MultiSite.R
+++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R
@@ -117,7 +117,7 @@ sda.enkf.multisite <- function(settings,
   if(!dir.exists(settings$outdir)) dir.create(settings$outdir, showWarnings = FALSE)
   
   ##### Creating matrices that describe the bounds of the state variables
-  ##### interval is remade everytime depending on the data at time t
+  ##### interval is remade every time depending on the data at time t
   ##### state.interval stays constant and converts new.analysis to be within the correct bounds
   interval    <- NULL
   state.interval <- cbind(as.numeric(lapply(settings$state.data.assimilation$state.variables,'[[','min_value')),
@@ -128,11 +128,6 @@ sda.enkf.multisite <- function(settings,
   if(multi.site.flag){
     conf.settings<-settings
     site.ids <- conf.settings$run %>% purrr::map('site') %>% purrr::map('id') %>% base::unlist() %>% base::as.character()
-    if(length(site.ids)==0){   ### these hacks should not be nessisary
-      for(s in seq_along(conf.settings$run)){
-        site.ids[s] = conf.settings[[s]]$run[[s]]$site$id
-      }
-    }
     # a matrix ready to be sent to spDistsN1 in sp package - first col is the long second is the lat and row names are the site ids
     site.locs <- conf.settings$run %>% purrr::map('site') %>% purrr::map_dfr(~c(.x[['lon']],.x[['lat']]) %>% as.numeric)%>% 
       t %>%
@@ -182,7 +177,7 @@ sda.enkf.multisite <- function(settings,
     }
   }
   obs.times <- obs.times.POSIX
-  read_restart_times <- c(lubridate::ymd_hms(start.cut, truncated = 3), obs.times)
+  read_restart_times <- c(lubridate::ymd_hms(start.cut, truncated = 3), obs.times) ## TODO: could cause an off-by-one error on iterative forecast if multiple obs times
   nt  <- length(obs.times) #sets length of for loop for Forecast/Analysis
   if (nt==0) PEcAn.logger::logger.severe('There has to be at least one Obs.')
 
@@ -217,33 +212,33 @@ sda.enkf.multisite <- function(settings,
   
   conf.settings <-conf.settings %>%
     `class<-`(c("list")) %>% #until here, it separates all the settings for all sites that listed in the xml file
-    furrr::future_map(function(settings) {
-      library(paste0("PEcAn.",settings$model$type), character.only = TRUE)#solved by including the model in the settings
+    furrr::future_map(function(my.settings) {
+      library(paste0("PEcAn.",my.settings$model$type), character.only = TRUE)#solved by including the model in the settings
       inputs.split <- list()
       if (!no_split) {
-        for (i in 1:length(settings$run$inputs$met$path)) {
+        for (i in 1:length(my.settings$run$inputs$met$path)) {
           #---------------- model specific split inputs
           ### model specific split inputs
-          settings$run$inputs$met$path[[i]] <- do.call(
+          my.settings$run$inputs$met$path[[i]] <- do.call(
             my.split_inputs,
             args = list(
-              settings = settings,
-              start.time = lubridate::ymd_hms(settings$run$site$met.start, truncated = 3), # This depends if we are restart or not
-              stop.time = lubridate::ymd_hms(settings$run$site$met.end, truncated = 3),
-              inputs =  settings$run$inputs$met$path[[i]],
-              outpath = paste0(paste0(settings$outdir, "/Extracted_met/"), settings$run$site$id),
-              overwrite =F
+              settings = my.settings,
+              start.time = lubridate::ymd_hms(my.settings$run$site$met.start, truncated = 3), # This depends if we are restart or not
+              stop.time = lubridate::ymd_hms(my.settings$run$site$met.end, truncated = 3),
+              inputs =  my.settings$run$inputs$met$path[[i]],
+              outpath = paste0(paste0(my.settings$outdir, "/Extracted_met/"), my.settings$run$site$id),
+              overwrite = FALSE 
             )
           )
           # changing the start and end date which will be used for model2netcdf.model
-          settings$run$start.date <- lubridate::ymd_hms(settings$state.data.assimilation$start.date, truncated = 3)
-          settings$run$end.date <- lubridate::ymd_hms(settings$state.data.assimilation$end.date, truncated = 3)
+          my.settings$run$start.date <- lubridate::ymd_hms(settings$state.data.assimilation$start.date, truncated = 3)
+          my.settings$run$end.date <- lubridate::ymd_hms(settings$state.data.assimilation$end.date, truncated = 3)
           
         }
       } else{
         inputs.split <- inputs
       }
-      settings
+      my.settings
     })
   conf.settings<- PEcAn.settings::as.MultiSettings(conf.settings)
   ###-------------------------------------------------------------------###
@@ -375,7 +370,9 @@ sda.enkf.multisite <- function(settings,
       if (t>1){
         #for next time step split the met if model requires
         #-Splitting the input for the models that they don't care about the start and end time of simulations and they run as long as their met file.
-        inputs.split <- metSplit(conf.settings, inputs, settings, model, no_split = FALSE, obs.times, t, nens, restart_flag = FALSE, my.split_inputs)
+        inputs.split <- metSplit(conf.settings, inputs, settings, model, no_split = FALSE, 
+                                 obs.times, t, nens, restart_flag = FALSE, my.split_inputs,
+                                 outpath = paste0(settings$outdir, "/Extracted_met/"))
         
         #---------------- setting up the restart argument for each site separately and keeping them in a list
         restart.list <-
diff --git a/modules/assim.sequential/inst/hf_landscape/04_ForwardOnlyForecast.R b/modules/assim.sequential/inst/hf_landscape/04_ForwardOnlyForecast.R
index 965d488d1bb..b9b63f05ce3 100644
--- a/modules/assim.sequential/inst/hf_landscape/04_ForwardOnlyForecast.R
+++ b/modules/assim.sequential/inst/hf_landscape/04_ForwardOnlyForecast.R
@@ -6,11 +6,35 @@ option_list = list(optparse::make_option("--start.date",
                       default = Sys.Date(),
                       type="character"))
 args <- optparse::parse_args(optparse::OptionParser(option_list = option_list))
-#args$start.date = "2022-05-17"
+#args$start.date = "2021-09-25"
 start.date = lubridate::as_date(args$start.date)
   
 #### grab & update default settings ####
-set = readRDS("/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/pecan.RDS")
+set = readRDS("/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pecan.RDS")
+if(FALSE){
+  ## one time updates
+  projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03"
+  set$outdir = projectdir
+  set$ensemble$size = 100  ## increase ensemble size
+  set$pfts[[1]]$posterior.files = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pfts/PFT_Files_new/temperate/post.distns.Rdata"
+  set$pfts[[2]]$posterior.files = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pfts/PFT_Files_new/conifer/post.distns.Rdata"
+  set$pfts[[3]]$posterior.files = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pfts/PFT_Files_new/grass/post.distns.Rdata"
+  set$pfts[[4]]$posterior.files = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pfts/trait.mcmc.pda.soil.HPDA_som_respiration_rate_correction.Rdata"
+  ## add soil pft to all sites
+  soil.pft = grep(pattern = "soil",x=unlist(sapply(set$pfts,function(x){x$name})))
+  for(i in seq_along(set)){
+    set[[i]]$run$site$site.pft[[2]] = set$pfts[[soil.pft]]$name
+    names(set[[i]]$run$site$site.pft)[2] = "pft.name"
+  }
+  ## initial conditions
+  for(i in seq_along(set)){
+    set[[i]]$run$inputs$poolinitcond = list()
+    set[[i]]$run$inputs$poolinitcond$path = paste0("/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/IC/IC_site_1-269",40+i,"_1.nc")
+    set[[i]]$run$inputs$pft.site = file.path(projectdir,"site_pft.csv")
+  }
+  saveRDS(set,file = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/pecan.RDS")
+}
+
 ##start.date
 for(s in seq_along(set)){
   set[[s]]$run$start.date = start.date
@@ -48,27 +72,28 @@ dir.create(set$rundir)
 dir.create(set$modeloutdir)
 dir.create(set$pfts$pft$outdir)
 ## job.sh
-set$model$jobtemplate = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/template.job"
+set$model$jobtemplate = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/template.job"
 
 
-set <- PEcAn.settings::prepare.settings(set, force = FALSE)
+#set <- PEcAn.settings::prepare.settings(set, force = FALSE)
+
 
-## add soil pft to all sites
-soil.pft = grep(pattern = "soil",x=unlist(sapply(set$pfts,function(x){x$name})))
-for(i in seq_along(set)){
-  set$run[[i]]$site$site.pft[[2]] = set$pfts[[soil.pft]]$name
-  names(set$run[[i]]$site$site.pft)[2] = "pft.name"
-}
 
 ## check PFTs
 pft.names = unlist(sapply(set$pfts,function(x){x$name}))
 set$pfts[[which(is.na(pft.names))]] = NULL
 
 ## run workflow
-set <- PEcAn.workflow::runModule.run.write.configs(set)
+set <- PEcAn.workflow::runModule.run.write.configs(set,use.existing.samples=TRUE)
 PEcAn.workflow::runModule_start_model_runs(set, stop.on.error = FALSE)
 
 ## future work
 ## * Integrate in Phyllis's changes to be able to save and reuse previous ensemble draws
 ## * restart from previous forecast
-## not sure why need to delete last line from met to get things to run.
\ No newline at end of file
+## not sure why need to delete last line from met to get things to run.
+
+## manual hack to recover runs.txt
+library(stringr)
+runs = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/FOF2021-09-25/run/runs.txt"
+runset = paste0("ENS-",str_pad(rep(1:100,times = 10), 5, pad = "0"),"-10000269",rep(41:50,each=100))
+write(runset,file=runs)
diff --git a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
index 8395b60af95..b7a4a68c0ad 100644
--- a/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
+++ b/modules/assim.sequential/inst/hf_landscape/05B_SDA_Workflow_NA.reanalysis.R
@@ -3,11 +3,12 @@
 ###########  CONFIGURATION ###################
 
 ## original start date was 2022-05-17
-runDays <- seq(as.Date("2023-07-06"), as.Date("2023-07-08"), by="days")
+runDays <- seq(as.Date("2021-09-26"), as.Date("2023-07-10"), by="days")
 FORCE = FALSE  ## should we overwrite previously completed runs
+GEN_FIGURES = FALSE ## should we generate figures?
 
 ## forecast configuration
-projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/" ## main folder
+projectdir = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/" ## main folder
 set = readRDS(file.path(projectdir,"pecan.RDS"))
 pecanhome = "/home/dietze/pecan"  ## directory where pecan is installed 
 
@@ -18,7 +19,7 @@ source(file.path(pecanhome,"modules/assim.sequential/inst/hf_landscape/minio_sec
 #minio_secret <- Sys.getenv("MINIO_SECRET_KEY", "password")
 minio_host <- Sys.getenv("MINIO_HOST", "test-pecan.bu.edu")
 minio_port <- Sys.getenv("MINIO_PORT", "9000")
-minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-none")
+minio_arrow_bucket <- Sys.getenv("MINIO_ARROW_BUCKET", "hf-landscape-v2")
 
 ################# Initial configuration (one time): ############################
 ##  * update local paths (uncomment, run once, recomment)
@@ -56,7 +57,7 @@ for (s in seq_along(runDays)) {
     ## is there output there?
     prev.out = dir(file.path(prev,"out"),full.names = TRUE)
     if(length(prev.out)>0){
-      prev.files = sapply(as.list(prev.out),function(x){length(dir(x,pattern = "*.nc"))})
+      prev.files = sapply(as.list(prev.out),function(x){length(dir(x,pattern = "*.nc$"))})
       if(min(prev.files)>0){
         
         #########   RUN FORECAST   ########
@@ -76,7 +77,70 @@ for (s in seq_along(runDays)) {
     break
   }
   
-}
+  ##########################################
+  ##  Ensure Sipnet.out files are merged  ##
+  ##########################################
+  this.out = dir(file.path(now,"out"),full.names = TRUE)
+  for(i in seq_along(this.out)){
+    #this.out[i] = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test03/tmp"
+    ### testing specific hack ###
+    foo = dir(this.out[i],pattern = "foo.nc",full.names = TRUE)
+    unlink(foo)
+    ## get files
+    ncf = dir(this.out[i],pattern = "*.nc$",full.names = TRUE)
+    out = dir(this.out[i],pattern = "*.out$",full.names = TRUE)
+    ## CASE 1: files weren't merged properly
+    if(length(out)>1 & length(ncf) > 0 ){ 
+      ## grab sipnet output
+      raw = list()
+      for(j in seq_along(out)){
+        raw[[j]] = read.delim(out[j], header = T, skip = 1, sep = "")
+        #if(length(out) > 1 & j == 1) raw[[j]][["day"]] = raw[[j]][["day"]] - 1 ## one-time testing hack
+      }
+      raw = dplyr::bind_rows(raw)
+      tvals <- raw[["day"]] + raw[["time"]] / 24
+      tdate = tvals + raw[["year"]]*1000 ## composite value for sorting, not a correct datetime
+      raw = raw[order(tdate),]
+      y = min(raw[["year"]])
+      dates <- PEcAn.SIPNET::sipnet2datetime(tvals,base_year = y)
+      ## grab netcdf
+      nc = ncdf4::nc_open(ncf)
+      ndates = ncdf4::ncvar_get(nc,varid = "time")
+      ncdf4::nc_close(nc)
+      if(min(tvals) == min(ndates) & max(tvals) == max(ndates)) next   ## check if date range matches
+      ## if not, clean up and try again
+      unlink(out)
+      unlink(ncf)
+      write.table("header",file=out[length(out)],col.names = F,row.names = F)
+      write.table(raw,file=out[length(out)],col.names = TRUE,row.names = FALSE,append = TRUE)
+      #out2 = read.delim(out[length(out)], header = T, skip = 1, sep = "") ## check that things match
+      PEcAn.SIPNET::model2netcdf.SIPNET(outdir = this.out[i],
+                                      sitelat=set[[1]]$run$site$lat,   ## works for HF but need to generalize this hard coding to match site
+                                      sitelon=set[[1]]$run$site$lat,
+                                      start_date = min(dates),
+                                      end_date   = max(dates),
+                                      revision   = "ssr"
+                                      )
+    } else {
+      ## CASE 1: single output, not converted
+      if(length(out) == 1 & length(ncf) == 0){
+        PEcAn.SIPNET::model2netcdf.SIPNET(outdir = this.out[i],
+                                          sitelat=set[[1]]$run$site$lat,   ## works for HF but need to generalize this hard coding to match site
+                                          sitelon=set[[1]]$run$site$lat,
+                                          start_date = runDays[s],
+                                          end_date   = as.Date(runDays[s]) + lubridate::days(35),
+                                          revision   = "ssr",
+                                          delete.raw = FALSE,
+                                          conflict   = TRUE
+                                          )
+      }
+    }
+  } ## end loop over files
+
+} ## end loop over dates
+
+
+
 
 ##########################################
 ## push output to minio in EFI standard ##
@@ -93,7 +157,7 @@ minio_uri_public <- function(...) {
   sprintf(template, minio_path(...), minio_host, ":", minio_port)
 }
 
-runDays <- seq(as.Date("2023-06-22"), as.Date("2023-07-10"), by="days")
+#runDays <- seq(as.Date("2023-06-22"), as.Date("2023-07-10"), by="days")
 
 ## loop over dates
 FORCE = FALSE
@@ -131,11 +195,17 @@ for (s in seq_along(runDays)) {
                                    start_date = runDays[s])
   }
   out = dplyr::bind_rows(out)
+  if(GEN_FIGURES){
+    ylim = range(out$GPP,na.rm=TRUE)
+    time = sort(unique(out$time_bounds))
+    plot(out$time_bounds,out$GPP,ylim=ylim,pch=".") ## GPP is UTC, has wrong shape, and is only one day
+  }
   if(!is.numeric(nrow(out)) | nrow(out) == 0) next  ## don't insert empty days into minio
-  out = out %>% relocate(parameter) %>% 
-    relocate(site_id) %>%
-    relocate(time_bounds) %>% rename(datetime=time_bounds) %>%
-    relocate(reference_datetime)
+  out = out %>% dplyr::relocate(parameter) %>% 
+    dplyr::relocate(site_id) %>%
+    dplyr::relocate(time_bounds) %>% 
+    dplyr::rename(datetime=time_bounds) %>%
+    dplyr::relocate(reference_datetime)
   out = tidyr::pivot_longer(out,5:ncol(out),names_to = "variable",values_to = "prediction")
 
   ## push to container in parquet format
diff --git a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
index 3cc2c17a6b5..1cf4d6f90d4 100644
--- a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
+++ b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
@@ -117,9 +117,10 @@ for (s in seq_along(runDays)) {
   
   ## did we write this run to minio already?
   if(!FORCE){  ## if not overwriting
-    ens = arrow::open_dataset(minio_uri_public(), format = "parquet" ) %>% 
-      dplyr::filter(lubridate::as_datetime(reference_datetime) == runDays[s]) %>%  
-      dplyr::distinct(parameter) %>% dplyr::collect()
+    ens = arrow::open_dataset(minio_uri_public(), format = "parquet" ) |> 
+      dplyr::filter(lubridate::as_datetime(reference_datetime) == runDays[s]) |>  
+      dplyr::distinct(parameter) |>
+      dplyr::collect()
     if(length(ens$parameter)>0) {
       print(paste("skipping",length(ens$parameter)))
       next
@@ -140,14 +141,15 @@ for (s in seq_along(runDays)) {
   }
   out = dplyr::bind_rows(out)
   if(!is.numeric(nrow(out)) | nrow(out) == 0) next  ## don't insert empty days into minio
-  out = out %>% relocate(parameter) %>% 
-    relocate(site_id) %>%
-    relocate(time_bounds) %>% rename(datetime=time_bounds) %>%
-    relocate(reference_datetime)
+  out = out %>% dplyr::relocate(parameter) |> 
+    dplyr::relocate(site_id) |>
+    dplyr::relocate(time_bounds) |> 
+    dplyr::rename(datetime=time_bounds) |>
+    dplyr::relocate(reference_datetime)
   out = tidyr::pivot_longer(out,5:ncol(out),names_to = "variable",values_to = "prediction")
   
   ## push to container in parquet format
-  out %>% dplyr::group_by(reference_datetime) %>% arrow::write_dataset(minio_uri(),format="parquet")
+  out |> dplyr::group_by(reference_datetime) |> arrow::write_dataset(minio_uri(),format="parquet")
   
 }
 
diff --git a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
index d54acb9e19b..01fa1dc7272 100755
--- a/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
+++ b/modules/assim.sequential/inst/hf_landscape/05_SDA_Workflow_NA.R
@@ -41,7 +41,11 @@ option_list = list(optparse::make_option("--start.date",
 args <- optparse::parse_args(optparse::OptionParser(option_list = option_list))
 #args$start.date = "2022-05-18 00:00:00"
 #args$prev = "/projectnb/dietzelab/dietze/hf_landscape_SDA/test02/FOF2022-05-17/"
+
+## set dates
 start.date = lubridate::as_date(args$start.date)
+prev.date  = basename(args$prev) |> substr(start=4,stop=13) |> lubridate::as_date()
+end.date <- start.date + lubridate::days(35)
 
 #------------------------------------------------------------------------------------------------
 #------------------------------------------ Preparing the pecan xml -----------------------------
@@ -49,21 +53,22 @@ start.date = lubridate::as_date(args$start.date)
 restart <- list()
 restart$filepath <- args$prev
 set = readRDS(args$settings)
-
-#set met.start & met.end
-end.date <- start.date + lubridate::days(35)
-sda.start = start.date + lubridate::days(1)
 projectdir = set$outdir
 
 # --------------------------------------------------------------------------------------------------
 #---------------------------------------------- NA DATA -------------------------------------
 # --------------------------------------------------------------------------------------------------
 
-#initialize obs.mean/cov NAs
+#initialize obs.mean/cov NAs & dates
+# note:
+#   for t=1: SDA doesn’t use metSplit, uses met.start, met.end
+#            for iterative forecast restart, this is PREV
+#   for t>1: metSplit uses obs.times t-1 -> t
 site.ids <- PEcAn.settings::papply(set,function(x)(x$run$site$id)) %>% unlist() %>% as.character()
 nsite = length(site.ids)
 
-NAdata = data.frame(date = c(rep(start.date,nsite),rep(sda.start,nsite)),
+NAdata = data.frame(date = c(rep(format(start.date, "%Y-%m-%d %H:%M:%S", tz = "GMT"),nsite),
+                             rep(format(end.date, "%Y-%m-%d %H:%M:%S", tz = "GMT"),nsite)), ##coerce to datetimes to avoid pumpkin rule
                     site_id = rep(site.ids,times=2),
                     data = rep(NA,nsite*2))
 obs.mean <- obs.cov <- split(NAdata, NAdata$date)
@@ -95,7 +100,7 @@ obs.cov <- purrr::map(
 ) %>% stats::setNames(date.obs)
 
 #add start.cut to restart list
-restart$start.cut <- start.date
+restart$start.cut <- as_datetime(start.date)
 restart$start.cut <- format(restart$start.cut, "%Y-%m-%d %H:%M:%S", tz = "GMT")
 
 
@@ -106,8 +111,8 @@ restart$start.cut <- format(restart$start.cut, "%Y-%m-%d %H:%M:%S", tz = "GMT")
 for(s in seq_along(set)){
   set[[s]]$run$start.date = start.date
   set[[s]]$run$end.date   = end.date
-  set[[s]]$run$site$met.start = start.date
-  set[[s]]$run$site$met.end   = end.date
+  set[[s]]$run$site$met.start = prev.date  ## time period for t == 1, prev forecast
+  set[[s]]$run$site$met.end   = start.date   
 }
 
 # Setting dates in assimilation tags - This will help with preprocess split in SDA code
@@ -143,7 +148,8 @@ met_paths <- list.files(path = file.path(path, start.date), full.names = TRUE, p
 #met_paths <- list.files(path = file.path(settings$run$settings.1$inputs$met$path, start.date), full.names = TRUE, pattern = ".clim")
 if(purrr::is_empty(met_paths)){
   print(paste("SKIPPING: NO MET FOR",start.date))
-  cat(as.character(start.date),sep="\n",file=file.path(dirname(set$outdir),"NO_MET"),append=TRUE) ## add to list of dates missing met
+##  cat(as.character(start.date),sep="\n",file=file.path(dirname(set$outdir),"NO_MET"),append=TRUE) ## add to list of dates missing met
+## TODO: for forecast, don't update NO_MET as files may still arrive; only update for reanalysis
   stop_quietly()
 }
 met_paths = as.list(met_paths)
@@ -158,10 +164,12 @@ prev_run_ids = list.files(file.path(restart$filepath, "out"))
 run_id = as.data.frame(strsplit(prev_run_ids,"-")) %>% t()
 colnames(run_id) <- c("pre","ens","site")
 rownames(run_id) <- NULL
-run_id = as.data.frame(run_id) %>% mutate(folder=prev_run_ids,id = paste0("id",.data$ens)) %>% group_by(site)
+run_id = as.data.frame(run_id) %>% 
+  dplyr::mutate(folder=prev_run_ids,id = paste0("id",.data$ens)) %>% 
+  dplyr::group_by(site)
 ###settings$runs$id = run_id
 for(s in seq_along(set)){
-  site_run_id = run_id |> filter(site == as.list(set$run[[s]]$site$id)[[1]])
+  site_run_id = run_id |> dplyr::filter(site == as.list(set[[s]]$run$site$id)[[1]])
   set[[s]]$run$id =  as.list(site_run_id$folder)
   names(set[[s]]$run$id) = site_run_id$id
 }
diff --git a/modules/assim.sequential/inst/hf_landscape/07_SMAP.R b/modules/assim.sequential/inst/hf_landscape/07_SMAP.R
index 4cd9c3c2841..67f48d786f1 100644
--- a/modules/assim.sequential/inst/hf_landscape/07_SMAP.R
+++ b/modules/assim.sequential/inst/hf_landscape/07_SMAP.R
@@ -1,4 +1,6 @@
+#!/usr/bin/env Rscript
 ## SMAP data prep cron script
+.libPaths(c("/projectnb/dietzelab/dietze/test-pecan/R/library",.libPaths()))
 
 ####  Libraries and configs
 #BiocManager::install("rhdf5")
@@ -184,7 +186,7 @@ for(t in seq_along(status$date)){
   
   ## determine what sites have already been processed
   SMAP_done = SMAP_CSV[SMAP_CSV$site_id %in% site.id & SMAP_CSV$date == status$date[t],]
-  if(nrow(SMAP_done) > 0){
+  if(nrow(SMAP_done) > 0 & !all(is.na(SMAP_done))){
     site.todo = !(site.id %in% SMAP_done$site_id)
     if(all(!site.todo)){ next } ## done, skip to the next year
   } else {
diff --git a/modules/assim.sequential/man/metSplit.Rd b/modules/assim.sequential/man/metSplit.Rd
index 63758ae5ddd..18c99376787 100644
--- a/modules/assim.sequential/man/metSplit.Rd
+++ b/modules/assim.sequential/man/metSplit.Rd
@@ -14,7 +14,8 @@ metSplit(
   t,
   nens,
   restart_flag = FALSE,
-  my.split_inputs
+  my.split_inputs,
+  outpath = NULL
 )
 }
 \arguments{
@@ -37,6 +38,8 @@ metSplit(
 \item{restart_flag}{TRUE/FALSE taken from restart arguement}
 
 \item{my.split_inputs}{generated by sda.enkf_MultiSite ex. split_inputs.SIPNET}
+
+\item{outpath}{if specified, write output to a new directory. Default NULL writes back to the directory being read}
 }
 \value{
 input.split object with split met filepaths

From 44e58ee3333198bd4942dc0a2c36c5b020550b41 Mon Sep 17 00:00:00 2001
From: Mike Dietze <dietze@bu.edu>
Date: Wed, 26 Jul 2023 16:09:38 -0400
Subject: [PATCH 5/7] forecast: check on sipnet -> nc conversion; SDA: tweaks
 on how parameter ensembles are set and updated.

---
 .../assim.sequential/R/sda.enkf_MultiSite.R   | 44 +++++++++----------
 .../05C_SDA_Workflow_NA.forecast.R            | 21 +++++++++
 2 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R
index a0f02a29fee..daf07d66f75 100644
--- a/modules/assim.sequential/R/sda.enkf_MultiSite.R
+++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R
@@ -334,29 +334,29 @@ sda.enkf.multisite <- function(settings,
   # weight matrix
   wt.mat <- matrix(NA, nrow = nens, ncol = nt)
   # Reading param samples------------------------------- 
-    #create params object using samples generated from TRAITS functions
-    if(restart_flag){
-      new.params <- new.params
-    }else{
-      if(!file.exists(file.path(settings$outdir, "samples.Rdata"))) PEcAn.logger::logger.severe("samples.Rdata cannot be found. Make sure you generate samples by running the get.parameter.samples function before running SDA.")
-      #Generate parameter needs to be run before this to generate the samples. This is hopefully done in the main workflow.
-      if(is.null(ensemble.samples)){
-        load(file.path(settings$outdir, "samples.Rdata"))
-      }
-      #reformatting params
-      new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens)
+  #create params object using samples generated from TRAITS functions
+  if(restart_flag){
+    new.params <- new.params
+  }else{
+    if(!file.exists(file.path(settings$outdir, "samples.Rdata"))) PEcAn.logger::logger.severe("samples.Rdata cannot be found. Make sure you generate samples by running the get.parameter.samples function before running SDA.")
+    #Generate parameter needs to be run before this to generate the samples. This is hopefully done in the main workflow.
+    if(is.null(ensemble.samples)){
+      load(file.path(settings$outdir, "samples.Rdata"))
     }
-      #sample met ensemble members
-      #TODO: incorporate Phyllis's restart work
-      #      sample all inputs specified in the settings$ensemble not just met
-      inputs <- PEcAn.settings::papply(conf.settings,function(setting) {
-        PEcAn.uncertainty::input.ens.gen(
-          settings = setting,
-          input = "met",
-          method = setting$ensemble$samplingspace$met$method,
-          parent_ids = NULL 
-        )
-       })
+    #reformatting params
+    new.params <- sda_matchparam(settings, ensemble.samples, site.ids, nens)
+  }
+  #sample met ensemble members
+  #TODO: incorporate Phyllis's restart work
+  #      sample all inputs specified in the settings$ensemble not just met
+  inputs <- PEcAn.settings::papply(conf.settings,function(setting) {
+    PEcAn.uncertainty::input.ens.gen(
+      settings = setting,
+      input = "met",
+      method = setting$ensemble$samplingspace$met$method,
+      parent_ids = NULL 
+    )
+  })
   ###------------------------------------------------------------------------------------------------###
   ### loop over time                                                                                 ###
   ###------------------------------------------------------------------------------------------------###
diff --git a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
index 1cf4d6f90d4..6d3fdbe5ef8 100644
--- a/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
+++ b/modules/assim.sequential/inst/hf_landscape/05C_SDA_Workflow_NA.forecast.R
@@ -78,6 +78,27 @@ for (s in seq_along(runDays)) {
                       stderr="stderr.log")
         print(msg)
         
+        
+        ## check that model2netcdf was run
+        this.out = dir(file.path(now,"out"),full.names = TRUE)
+        for(i in seq_along(this.out)){
+          ## get files
+          ncf = dir(this.out[i],pattern = "*.nc$",full.names = TRUE)
+          out = dir(this.out[i],pattern = "*.out$",full.names = TRUE)
+          if(length(out) >= 1 & length(ncf) == 0){
+              PEcAn.SIPNET::model2netcdf.SIPNET(outdir = this.out[i],
+                                                sitelat=set[[1]]$run$site$lat,   ## works for HF but need to generalize this hard coding to match site
+                                                sitelon=set[[1]]$run$site$lat,
+                                                start_date = runDays[s],
+                                                end_date   = as.Date(runDays[s]) + lubridate::days(35),
+                                                revision   = "ssr",
+                                                delete.raw = FALSE,
+                                                conflict   = TRUE
+              )
+          }
+        }
+        
+        
       }
     } else { break }
   } else {

From 27ba92d532df8da65404bc7d61f25cd6aa171c65 Mon Sep 17 00:00:00 2001
From: Mike Dietze <dietze@bu.edu>
Date: Wed, 26 Jul 2023 16:16:07 -0400
Subject: [PATCH 6/7] switch to native pipes

---
 modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R b/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R
index 4bace0a4de4..f8d9175e7c4 100644
--- a/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R
+++ b/modules/assim.sequential/inst/hf_landscape/PEcAn2EFI.R
@@ -63,7 +63,7 @@ PEcAn2EFI <- function(outdir = NULL, ne = NULL, site_id = NULL, start_date=NULL,
     ensemble.output[[row]]$time_bounds = tod
     
     ## convert to data frame
-    ensemble.output[[row]]  = as.data.frame(ensemble.output[[row]]) %>% 
+    ensemble.output[[row]]  = as.data.frame(ensemble.output[[row]]) |> 
       dplyr::mutate(
         parameter = row,
         reference_datetime = lubridate::as_datetime(start_date),
@@ -99,7 +99,7 @@ PEcAn2EFI.ens <- function(outdir,run.id,start_date,end_date = NULL){
   ensemble.output$time_bounds = tod
   
   ## convert to data frame
-  ensemble.output  = as.data.frame(ensemble.output) %>% 
+  ensemble.output  = as.data.frame(ensemble.output) |> 
     dplyr::mutate(
       parameter = ens.id,
       reference_datetime = lubridate::as_datetime(start_date),

From 7c39df4e9f8cc037dcfa90e3d7a2736f5770e7ff Mon Sep 17 00:00:00 2001
From: Michael Dietze <dietze@bu.edu>
Date: Wed, 13 Mar 2024 09:04:47 -0400
Subject: [PATCH 7/7] Update base/workflow/R/runModule.run.write.configs.R

Co-authored-by: Chris Black <chris@ckblack.org>
---
 base/workflow/R/runModule.run.write.configs.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/base/workflow/R/runModule.run.write.configs.R b/base/workflow/R/runModule.run.write.configs.R
index 1bddef506f1..200677b5cf4 100644
--- a/base/workflow/R/runModule.run.write.configs.R
+++ b/base/workflow/R/runModule.run.write.configs.R
@@ -12,7 +12,11 @@ runModule.run.write.configs <- function(settings, overwrite = TRUE, use.existing
       PEcAn.logger::logger.warn("Existing runs.txt file will be removed.")
       unlink(file.path(settings$rundir, "runs.txt"))
     }
-    return(PEcAn.settings::papply(settings, runModule.run.write.configs, overwrite = overwrite,use.existing.samples=use.existing.samples))
+    return(PEcAn.settings::papply(
+      settings,
+      runModule.run.write.configs,
+      overwrite = overwrite,
+      use.existing.samples = use.existing.samples))
   } else if (PEcAn.settings::is.Settings(settings)) {
     write <- settings$database$bety$write
     # double check making sure we have method for parameter sampling