diff --git a/.Rbuildignore b/.Rbuildignore index 9300917..5571076 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ training_data.rds ^pkgdown$ ^CODE_OF_CONDUCT\.md$ ^README\.Rmd$ +data-raw diff --git a/DESCRIPTION b/DESCRIPTION index dd82226..b0b1305 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -42,6 +42,7 @@ Suggests: scales, testthat (>= 3.0.0), tidync, + tigris, viridis Config/testthat/edition: 3 Config/testthat/parallel: true diff --git a/R/helper.R b/R/helper.R index 9fce8fc..46fb303 100644 --- a/R/helper.R +++ b/R/helper.R @@ -7,8 +7,7 @@ } utils::globalVariables(c( - "s2", - "NAME", + "s2", "NAME", "Sample Duration", "Observation Percent", "State Code", "County Code", "Site Num", "Latitude", "Longitude", "Arithmetic Mean", "Date Local", @@ -17,16 +16,11 @@ utils::globalVariables(c( "SO4SMASS", "merra_dust", "merra_oc", "merra_oc", "merra_bc", "merra_ss", "merra_so4", "value", "d", "pollutant code", "site longitude", "site latitude", "total_emissions", - "dist_to_point", "air.2m", "hpbl", "acpcp", "rhum.2m", "vis", "pres.sfc", "uwnd.10m", "vwnd.10m", - "urban_imperviousness", "merra_pm25", "plume_smoke", ".rowid", - "nei_point_id2w", "census_tract_id_2010", - "predictions", "variance.estimates", - "aadt_total", "aadt_total_m", "aadt_truck", "aadt_truck_m", - "AADT", "AADT_COMBINATION", "AADT_SINGLE_UNIT", "Shape", "s2_centroid" + "predictions", "variance.estimates" )) #' Get the closest years to a vector of dates diff --git a/README.Rmd b/README.Rmd index 8da8abc..a7300e6 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,8 +16,6 @@ knitr::opts_chunk$set( # Air Pollution Prediction Commons -[![Lifecycle: -experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![R-CMD-check](https://github.com/geomarker-io/appc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/geomarker-io/appc/actions/workflows/R-CMD-check.yaml) [![CRAN status](https://www.r-pkg.org/badges/version/appc)](https://CRAN.R-project.org/package=appc) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) @@ -28,7 +26,7 @@ experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](h The goal of the appc package is to provide daily, high resolution, near real-time, model-based ambient air pollution exposure assessments. This is achieved by training a generalized random forest on several geomarkers to predict daily average EPA AQS concentrations from 2017 until the present at exact locations across the contiguous United States (see `vignette("cv-model-performance")` for more details). The appc package contains functions for generating geomarker predictors and the ambient air pollution concentrations. -Predictor geomarkers include weather and atmospheric information, traffic on primary roadways, urban imperviousness, wildfire smoke, industrial emissions, elevation, spatiotemporal indicators, and satellite-based aerosol diagnostics data. +Predictor geomarkers include weather and atmospheric information, wildfire smoke plumes, elevation, and satellite-based aerosol diagnostics products. Source files included with the package train and evaluate models that can be updated with any release to use more recent AQS measurements and/or geomarker predictors. ## Installing @@ -74,12 +72,10 @@ Spatiotemporal geomarkers are used for predicting air pollution concentrations, |-----------|---------------| | 🌦 weather & atmospheric conditions | `get_narr_data()` | | 🛰 satellite-based aerosol diagnostics | `get_merra_data()` | -| 🚍 traffic densities | `get_traffic_summary()` | -| 🏙 urban imperviousness | `get_urban_imperviousness()` | | 🔥 wildfire smoke | `get_hms_smoke_data()` | -| 🏭 industrial emissions | `get_nei_point_summary()` | | 🗻 elevation | `get_elevation_summary()` | -| 🔗 census tract identifier | `get_census_tract_id()` | + +Currently, `get_urban_imperviousness()`, `get_traffic()`, and `get_nei_point_summary()` are stashed in the `/inst` folder and not integrated into this package. ## Developing @@ -96,12 +92,9 @@ Available recipes: dl_geomarker_data # download all geomarker ahead of time, if not already cached docker_test # run tests without cached release files docker_tool # build docker image preloaded with {appc} and data + make_training_data # make training data for GRF release_hms_smoke_data # install smoke data from source and upload to github release release_merra_data # upload merra data to github release release_model # upload grf model and training data to current github release - release_nei_data # install nei data from source and upload to github release - release_smoke_data # install smoke data from source and upload to github release - release_traffic_data # install traffic data from source and upload to github release - release_urban_imperviousness_data # install nlcd urban imperviousness data from source and upload to github release train_model # train grf model and render report ``` diff --git a/README.md b/README.md index 46efb82..43f667c 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,6 @@ -[![Lifecycle: -experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![R-CMD-check](https://github.com/geomarker-io/appc/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/geomarker-io/appc/actions/workflows/R-CMD-check.yaml) [![CRAN status](https://www.r-pkg.org/badges/version/appc)](https://CRAN.R-project.org/package=appc) @@ -24,12 +22,10 @@ until the present at exact locations across the contiguous United States (see `vignette("cv-model-performance")` for more details). The appc package contains functions for generating geomarker predictors and the ambient air pollution concentrations. Predictor geomarkers include -weather and atmospheric information, traffic on primary roadways, urban -imperviousness, wildfire smoke, industrial emissions, elevation, -spatiotemporal indicators, and satellite-based aerosol diagnostics data. -Source files included with the package train and evaluate models that -can be updated with any release to use more recent AQS measurements -and/or geomarker predictors. +weather and atmospheric information, wildfire smoke plumes, elevation, +and satellite-based aerosol diagnostics products. Source files included +with the package train and evaluate models that can be updated with any +release to use more recent AQS measurements and/or geomarker predictors. ## Installing @@ -52,25 +48,25 @@ appc::predict_pm25( dates = list(as.Date(c("2023-05-18", "2023-11-06")), as.Date(c("2023-06-22", "2023-08-15"))) ) #> ℹ (down)loading random forest model -#> ✔ (down)loading random forest model [8.3s] +#> ✔ (down)loading random forest model [8.2s] #> #> ℹ checking that s2 locations are within the contiguous united states -#> ✔ checking that s2 locations are within the contiguous united states [54ms] +#> ✔ checking that s2 locations are within the contiguous united states [55ms] #> #> ℹ adding coordinates #> ✔ adding coordinates [1.3s] #> #> ℹ adding elevation -#> ✔ adding elevation [1.4s] +#> ✔ adding elevation [1.3s] #> #> ℹ adding HMS smoke data -#> ✔ adding HMS smoke data [986ms] +#> ✔ adding HMS smoke data [967ms] #> #> ℹ adding NARR #> ✔ adding NARR [3.1s] #> #> ℹ adding MERRA -#> ✔ adding MERRA [556ms] +#> ✔ adding MERRA [569ms] #> #> ℹ adding time components #> ✔ adding time components [24ms] @@ -123,16 +119,16 @@ Spatiotemporal geomarkers are used for predicting air pollution concentrations, but also serve as exposures or confounding exposures themselves. View information and options about each geomarker: -| geomarker | appc function | -|---------------------------------------|------------------------------| -| 🌦 weather & atmospheric conditions | `get_narr_data()` | -| 🛰 satellite-based aerosol diagnostics | `get_merra_data()` | -| 🚍 traffic densities | `get_traffic_summary()` | -| 🏙 urban imperviousness | `get_urban_imperviousness()` | -| 🔥 wildfire smoke | `get_hms_smoke_data()` | -| 🏭 industrial emissions | `get_nei_point_summary()` | -| 🗻 elevation | `get_elevation_summary()` | -| 🔗 census tract identifier | `get_census_tract_id()` | +| geomarker | appc function | +|---------------------------------------|---------------------------| +| 🌦 weather & atmospheric conditions | `get_narr_data()` | +| 🛰 satellite-based aerosol diagnostics | `get_merra_data()` | +| 🔥 wildfire smoke | `get_hms_smoke_data()` | +| 🗻 elevation | `get_elevation_summary()` | + +Currently, `get_urban_imperviousness()`, `get_traffic()`, and +`get_nei_point_summary()` are stashed in the `/inst` folder and not +integrated into this package. ## Developing @@ -154,12 +150,9 @@ Available recipes: dl_geomarker_data # download all geomarker ahead of time, if not already cached docker_test # run tests without cached release files docker_tool # build docker image preloaded with {appc} and data + make_training_data # make training data for GRF release_hms_smoke_data # install smoke data from source and upload to github release release_merra_data # upload merra data to github release release_model # upload grf model and training data to current github release - release_nei_data # install nei data from source and upload to github release - release_smoke_data # install smoke data from source and upload to github release - release_traffic_data # install traffic data from source and upload to github release - release_urban_imperviousness_data # install nlcd urban imperviousness data from source and upload to github release train_model # train grf model and render report ``` diff --git a/_pkgdown.yml b/_pkgdown.yml index dfe9a91..adfd516 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -6,9 +6,6 @@ reference: contents: - get_elevation_summary - get_narr_data - - get_nei_point_summary - - get_urban_imperviousness - - get_traffic_summary - get_merra_data - get_hms_smoke_data - title: Air pollution assessment diff --git a/justfile b/justfile index 6dbcf62..cc77608 100644 --- a/justfile +++ b/justfile @@ -20,12 +20,9 @@ dl_geomarker_data: #!/usr/bin/env Rscript library(appc) install_elevation_data() - install_traffic() tidyr::expand_grid(narr_var = c("air.2m", "hpbl", "acpcp", "rhum.2m", "vis", "pres.sfc", "uwnd.10m", "vwnd.10m"), narr_year = as.character(2017:2023)) |> purrr::pmap_chr(install_narr_data) - purrr::map_chr(c("2017", "2020"), install_nei_point_data) - purrr::map_chr(c("2016", "2019", "2021"), install_urban_imperviousness) install_hms_smoke_data() purrr::map_chr(as.character(2017:2023), install_merra_data)