From fc1eebae2dd0bfecbdf2fd0a499a34e253386638 Mon Sep 17 00:00:00 2001 From: LTLA Date: Sun, 26 Nov 2023 23:50:52 -0800 Subject: [PATCH] More coordinated optimization of storage layouts for HDF5. --- .gitignore | 2 ++ DESCRIPTION | 1 + R/{optimizeStorage.R => optimize_storage.R} | 7 ++++++- .../collect_float_attributes.cpp | 10 +++++----- 4 files changed, 14 insertions(+), 6 deletions(-) rename R/{optimizeStorage.R => optimize_storage.R} (96%) rename collect_float_attributes.cpp => src/collect_float_attributes.cpp (92%) diff --git a/.gitignore b/.gitignore index 1c56872..d291ee9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ *.swp *.rds *.html +*.so +*.o diff --git a/DESCRIPTION b/DESCRIPTION index 67d4b5a..ac7bd8a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -24,6 +24,7 @@ Suggests: knitr, BiocStyle, chihaya +LinkingTo: Rcpp VignetteBuilder: knitr RoxygenNote: 7.2.3 biocViews: diff --git a/R/optimizeStorage.R b/R/optimize_storage.R similarity index 96% rename from R/optimizeStorage.R rename to R/optimize_storage.R index 713121b..1c63f87 100644 --- a/R/optimizeStorage.R +++ b/R/optimize_storage.R @@ -19,6 +19,7 @@ setMethod("collect_integer_attributes", "array", function(x) { }) setMethod("collect_integer_attributes", "ANY", function(x) { + collated <- blockApply(x, collect_integer_attributes) list( range=aggregate_range(collated, "range"), @@ -133,9 +134,13 @@ optimize_float_storage <- function(x) { } else if (!attr$has_max) { placeholder <- max_double() } + + # Fallback that just goes through and pulls out all unique values. if (is.null(placeholder)) { - warning("cannot guess a suitable missing value placeholder, treating all NAs as NaNs") + u <- Reduce(union, blockApply(x, function(y) unique(as.vector(y)))) + placeholder <- chooseMissingPlaceholderForHdf5(u) } + return(list(type="H5T_NATIVE_DOUBLE", placeholder=placeholder)) } else { diff --git a/collect_float_attributes.cpp b/src/collect_float_attributes.cpp similarity index 92% rename from collect_float_attributes.cpp rename to src/collect_float_attributes.cpp index 82a4e9a..9023ac1 100644 --- a/collect_float_attributes.cpp +++ b/src/collect_float_attributes.cpp @@ -62,8 +62,8 @@ Rcpp::List collect_double_attributes(Rcpp::NumericVector x) { if (!non_integer) { for (auto y : x) { if (!ISNA(y)) { - minv = std::min(y, v); - maxv = std::max(y, v); + minv = std::min(y, minv); + maxv = std::max(y, maxv); } } } @@ -71,7 +71,7 @@ Rcpp::List collect_double_attributes(Rcpp::NumericVector x) { bool has_lowest = false, has_highest = false; { - constexpr double lowest = lowest_double(); + double lowest = lowest_double(); for (auto y : x) { if (!ISNA(y) && y == lowest) { has_lowest = true; @@ -79,7 +79,7 @@ Rcpp::List collect_double_attributes(Rcpp::NumericVector x) { } } - constexpr double highest = max_double(); + double highest = max_double(); for (auto y : x) { if (!ISNA(y) && y == highest) { has_highest = true; @@ -88,7 +88,7 @@ Rcpp::List collect_double_attributes(Rcpp::NumericVector x) { } } - return Rcpp::List( + return Rcpp::List::create( Rcpp::Named("range") = Rcpp::NumericVector::create(minv, maxv), Rcpp::Named("missing") = Rcpp::LogicalVector::create(has_missing), Rcpp::Named("non_integer") = Rcpp::LogicalVector::create(non_integer),