From 39d07434dd5b719a56f70a383cb381408535c0fd Mon Sep 17 00:00:00 2001 From: Kelli Johnson Date: Fri, 12 Jul 2024 10:13:32 -0700 Subject: [PATCH] fix(FIMSFrame): Assume yyyy-mm-dd format for dates and check column names in FIMSFrame The column names of the S4@data object were being checked with the validation function but there was no check in FIMSFrame and to do the calculations within FIMSFrame all of the columns need to be present. Right now we are checking it twice, once in FIMSFrame and once in the validator of the FIMSFrame class. We might need a different class like an input data class and we could check the class upon input to FIMSFrame but this works for right now. @k-doering-noaa do you have any ideas here? Had to change the test of a data frame without the proper columns to just error out because no warnings are given just a stop() command. When writing a data frame to a csv and reading it back in, the date formatting can be lost, e.g., 0001-01-01 turns into 1-1-1. FIMS requires a yyyy-mm-dd format. Now the use of the as.Date() function will create a date object from a character object but only if it is in the correct format. If it is in the wrong format, e.g., yyyy/mm/dd, then the function will error out. Right now the start_year and end_year are formatted as integers because for plotting, I thought it would be better to have year 1 versus year 0001 but we can change this. @ian-taylor-NOAA what do you think? Close #639 --------- Co-authored-by: Kelli.Johnson <1536491690113302@mil> --- R/fimsframe.R | 100 ++++++++++++++++++++++---------- tests/testthat/test-fimsframe.R | 2 +- 2 files changed, 71 insertions(+), 31 deletions(-) diff --git a/R/fimsframe.R b/R/fimsframe.R index 24d5f846..a5bc3c39 100644 --- a/R/fimsframe.R +++ b/R/fimsframe.R @@ -225,31 +225,25 @@ setValidity( errors <- c(errors, "data must have at least one row") } - # Check columns - if (!"type" %in% colnames(object@data)) { - errors <- c(errors, "data must contain 'type'") - } - if (!"datestart" %in% colnames(object@data)) { - errors <- c(errors, "data must contain 'datestart'") - } - if (!"dateend" %in% colnames(object@data)) { - errors <- c(errors, "data must contain 'dateend'") - } - if (!"dateend" %in% colnames(object@data)) { - errors <- c(errors, "data must contain 'value'") - } - if (!"dateend" %in% colnames(object@data)) { - errors <- c(errors, "data must contain 'unit'") - } - if (!"dateend" %in% colnames(object@data)) { - errors <- c(errors, "data must contain 'uncertainty'") + errors <- c(errors, validate_data_colnames(object@data)) + + # Add checks for other slots + # Check the format for acceptable variants of the ideal yyyy-mm-dd + grepl_datestart <- grepl( + "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1-2}", + data_mile1[["datestart"]] + ) + grepl_dateend <- grepl( + "[0-9]{1,4}-[0-9]{1,2}-[0-9]{1-2}", + data_mile1[["dateend"]] + ) + if (!all(grepl_datestart)) { + errors <- c(errors, "datestart must be in 'yyyy-mm-dd' format") } - if (!"age" %in% colnames(object@data)) { - errors <- c(errors, "data must contain 'age'") + if (!all(grepl_dateend)) { + errors <- c(errors, "dateend must be in 'yyyy-mm-dd' format") } - # TODO: Add checks for other slots - # Return if (length(errors) == 0) { return(TRUE) @@ -259,6 +253,36 @@ setValidity( } ) +validate_data_colnames <- function(data) { + the_column_names <- colnames(data) + errors <- character() + if (!"type" %in% the_column_names) { + errors <- c(errors, "data must contain 'type'") + } + if (!"name" %in% the_column_names) { + errors <- c(errors, "data must contain 'name'") + } + if (!"datestart" %in% the_column_names) { + errors <- c(errors, "data must contain 'datestart'") + } + if (!"dateend" %in% the_column_names) { + errors <- c(errors, "data must contain 'dateend'") + } + if (!"dateend" %in% the_column_names) { + errors <- c(errors, "data must contain 'value'") + } + if (!"dateend" %in% the_column_names) { + errors <- c(errors, "data must contain 'unit'") + } + if (!"dateend" %in% the_column_names) { + errors <- c(errors, "data must contain 'uncertainty'") + } + if (!"age" %in% the_column_names) { + errors <- c(errors, "data must contain 'age'") + } + return(errors) +} + # Constructors ---- # All constructors in this file are documented in 1 roxygen file via @rdname. @@ -281,14 +305,30 @@ setValidity( #' on the child class. Use [showClass()] to see all available slots. #' @export FIMSFrame <- function(data) { - # Get the earliest and latest year of data and use to calculate n years for - # population simulation - start_year <- as.integer( - strsplit(min(data[["datestart"]], na.rm = TRUE), "-")[[1]][1] - ) - end_year <- as.integer( - strsplit(max(data[["dateend"]], na.rm = TRUE), "-")[[1]][1] - ) + errors <- validate_data_colnames(data) + if (length(errors) > 0) { + stop( + "Check the columns of your data, the following are missing:\n", + paste(errors, sep = "\n", collapse = "\n") + ) + } + # datestart and dateend need to be date classes so leading zeros are present + # but writing and reading from csv file removes the classes so they must be + # enforced here + # e.g., 0004-01-01 for January 01 0004 + date_formats <- c("%Y-%m-%d") + data[["datestart"]] <- as.Date(data[["datestart"]], tryFormats = date_formats) + data[["dateend"]] <- as.Date(data[["dateend"]], tryFormats = date_formats) + + # Get the earliest and latest year formatted as a string of 4 integers + start_year <- as.integer(format( + as.Date(min(data[["datestart"]], na.rm = TRUE), tryFormats = date_formats), + "%Y" + )) + end_year <- as.integer(format( + as.Date(max(data[["dateend"]], na.rm = TRUE), tryFormats = date_formats), + "%Y" + )) n_years <- as.integer(end_year - start_year + 1) years <- start_year:end_year diff --git a/tests/testthat/test-fimsframe.R b/tests/testthat/test-fimsframe.R index 0dbfafcd..f50a4c66 100644 --- a/tests/testthat/test-fimsframe.R +++ b/tests/testthat/test-fimsframe.R @@ -73,7 +73,7 @@ test_that("Show method works as expected", { test_that("Validators work as expected", { bad_input <- data.frame(test = 1, test2 = 2) - expect_warning(expect_error(FIMSFrame(bad_input))) + expect_error(FIMSFrame(bad_input)) }) n_years <- fims_frame@n_years