Skip to content

Commit

Permalink
More coordinated optimization of storage layouts for HDF5.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Nov 27, 2023
1 parent 85d2b90 commit fc1eeba
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 6 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
*.swp
*.rds
*.html
*.so
*.o
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Suggests:
knitr,
BiocStyle,
chihaya
LinkingTo: Rcpp
VignetteBuilder: knitr
RoxygenNote: 7.2.3
biocViews:
Expand Down
7 changes: 6 additions & 1 deletion R/optimizeStorage.R → R/optimize_storage.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ setMethod("collect_integer_attributes", "array", function(x) {
})

setMethod("collect_integer_attributes", "ANY", function(x) {

collated <- blockApply(x, collect_integer_attributes)
list(
range=aggregate_range(collated, "range"),
Expand Down Expand Up @@ -133,9 +134,13 @@ optimize_float_storage <- function(x) {
} else if (!attr$has_max) {
placeholder <- max_double()
}

# Fallback that just goes through and pulls out all unique values.
if (is.null(placeholder)) {
warning("cannot guess a suitable missing value placeholder, treating all NAs as NaNs")
u <- Reduce(union, blockApply(x, function(y) unique(as.vector(y))))
placeholder <- chooseMissingPlaceholderForHdf5(u)
}

return(list(type="H5T_NATIVE_DOUBLE", placeholder=placeholder))

} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,24 +62,24 @@ Rcpp::List collect_double_attributes(Rcpp::NumericVector x) {
if (!non_integer) {
for (auto y : x) {
if (!ISNA(y)) {
minv = std::min(y, v);
maxv = std::max(y, v);
minv = std::min(y, minv);
maxv = std::max(y, maxv);
}
}
}
}

bool has_lowest = false, has_highest = false;
{
constexpr double lowest = lowest_double();
double lowest = lowest_double();
for (auto y : x) {
if (!ISNA(y) && y == lowest) {
has_lowest = true;
break;
}
}

constexpr double highest = max_double();
double highest = max_double();
for (auto y : x) {
if (!ISNA(y) && y == highest) {
has_highest = true;
Expand All @@ -88,7 +88,7 @@ Rcpp::List collect_double_attributes(Rcpp::NumericVector x) {
}
}

return Rcpp::List(
return Rcpp::List::create(
Rcpp::Named("range") = Rcpp::NumericVector::create(minv, maxv),
Rcpp::Named("missing") = Rcpp::LogicalVector::create(has_missing),
Rcpp::Named("non_integer") = Rcpp::LogicalVector::create(non_integer),
Expand Down

0 comments on commit fc1eeba

Please sign in to comment.