From 0a88c22d02ab2d7c7b79b5c3fa982826289916ef Mon Sep 17 00:00:00 2001 From: LTLA Date: Sun, 26 Nov 2023 23:11:16 -0800 Subject: [PATCH] Switched the savers/readers for dense arrays to the new world. --- NAMESPACE | 2 + R/readArray.R | 26 ++++++++----- R/saveArray.R | 11 ++++-- R/utils.R | 4 +- man/readArray.Rd | 17 ++++---- man/saveArray.Rd | 3 ++ .../{test-stage-array.R => test-save-array.R} | 39 ++++++++++++++++++- 7 files changed, 79 insertions(+), 23 deletions(-) rename tests/testthat/{test-stage-array.R => test-save-array.R} (86%) diff --git a/NAMESPACE b/NAMESPACE index d1a7ed1..1a5ae73 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,6 +43,7 @@ importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dsparseMatrix) importClassesFrom(SparseArray,SVT_SparseMatrix) importFrom(BiocGenerics,path) +importFrom(DelayedArray,"type<-") importFrom(DelayedArray,DelayedArray) importFrom(DelayedArray,OLD_extract_sparse_array) importFrom(DelayedArray,acbind) @@ -63,6 +64,7 @@ importFrom(DelayedArray,read_sparse_block) importFrom(DelayedArray,rowAutoGrid) importFrom(DelayedArray,type) importFrom(HDF5Array,H5SparseMatrixSeed) +importFrom(HDF5Array,HDF5Array) importFrom(HDF5Array,HDF5ArraySeed) importFrom(HDF5Array,writeHDF5Array) importFrom(S4Vectors,new2) diff --git a/R/readArray.R b/R/readArray.R index b9d4d87..9e92952 100644 --- a/R/readArray.R +++ b/R/readArray.R @@ -1,13 +1,15 @@ -#' Read high-dimensional arrays from disk +#' Read a dense array from disk #' -#' Read arrays from on-disk formats, using the corresponding \code{\link{stageObject}} method. -#' It should not be necessary for users to call this function manually. +#' Read a dense high-dimensional array from its on-disk representation. #' -#' @param info Named list containing the metadata for this array. -#' @param project Any argument accepted by the acquisition functions, see \code{?\link{acquireFile}}. -#' By default, this should be a string containing the path to a staging directory. +#' @param path String containing a path to a directory, itself created by the \code{\link{saveObject}} method for a dense array. +#' @param array.file.backed Logical scalar indicating whether to return a file-backed S4 array class, or an ordinary R array in memory. +#' @param ... Further arguments, ignored. #' -#' @return A multi-dimensional object (usually a \linkS4class{DelayedMatrix}) containing the array data. +#' @return A multi-dimensional array-like object, either a \linkS4class{DelayedArray} or an ordinary R array. +#' +#' @seealso +#' \code{"\link{saveObject,array-method}"}, to create the directory and its contents. #' #' @author Aaron Lun #' @@ -24,7 +26,8 @@ #' readArray(dir) #' #' @export -#' @import HDF5Array +#' @importFrom HDF5Array HDF5Array +#' @importFrom DelayedArray type<- readArray <- function(path, array.file.backed=TRUE, ...) { fpath <- file.path(path, "array.h5") @@ -47,7 +50,10 @@ readArray <- function(path, array.file.backed=TRUE, ...) { list(type=type, names=names, transposed=(transposed != 0L), placeholder=placeholder) }) - out <- HDF5Array(filepath=fpath, name="dense_array/data", type=from_array_type(details$type)) + out <- HDF5Array(filepath=fpath, name="dense_array/data") + if (type(out) == "raw") { # ... so that placeholders are correctly substituted. + type(out) <- "integer" + } if (!is.null(details$names)) { dimnames(out) <- rev(details$names) @@ -57,7 +63,9 @@ readArray <- function(path, array.file.backed=TRUE, ...) { } if (!is.null(details$placeholder)) { out <- DelayedMask(out, placeholder=details$placeholder) + out <- DelayedArray(out) } + type(out) <- from_array_type(details$type) if (!array.file.backed) { return(as.array(out)) diff --git a/R/saveArray.R b/R/saveArray.R index 75fe050..694b508 100644 --- a/R/saveArray.R +++ b/R/saveArray.R @@ -10,6 +10,9 @@ #' @return #' \code{x} is saved to \code{path} and \code{NULL} is invisibly returned. #' +#' @seealso +#' \code{\link{readArray}}, to read the directory contents back into the R session. +#' #' @author Aaron Lun #' @examples #' mat <- array(rpois(10000, 10), c(50, 20, 10)) @@ -24,8 +27,9 @@ #' list.files(dir) #' #' @name saveArray -#' @aliases stageObject,array-method -#' @aliases stageObject,Matrix-method +#' @aliases +#' stageObject,array-method +#' stageObject,Matrix-method NULL #' @import alabaster.base rhdf5 @@ -60,7 +64,7 @@ NULL if (!is.null(transformed$placeholder)) { dhandle <- H5Dopen(ghandle, "data") on.exit(H5Dclose(dhandle), add=TRUE, after=FALSE) - h5_write_attribute(dhandle, missingPlaceholderName, transformed$placeholder) + h5_write_attribute(dhandle, missingPlaceholderName, transformed$placeholder, scalar=TRUE) } save_names(ghandle, x, transpose=TRUE) @@ -138,7 +142,6 @@ setMethod("saveObject", "denseMatrix", .save_array) } #' @export -#' @rdname stageArray setMethod("stageObject", "array", function(x, dir, path, child=FALSE) .stage_array(x, dir, path, child=child)) #' @importFrom alabaster.base .stageObject diff --git a/R/utils.R b/R/utils.R index 84ce0e0..fddd7d4 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,5 +1,5 @@ #' @importFrom DelayedArray type -array_type <- function(x) { +to_array_type <- function(x) { switch(type(x), integer="integer", double="number", @@ -9,6 +9,8 @@ array_type <- function(x) { ) } +array_type <- to_array_type + from_array_type <- function(x) { switch(x, integer="integer", diff --git a/man/readArray.Rd b/man/readArray.Rd index 46f34af..dbb4cc6 100644 --- a/man/readArray.Rd +++ b/man/readArray.Rd @@ -2,22 +2,22 @@ % Please edit documentation in R/readArray.R \name{readArray} \alias{readArray} -\title{Read high-dimensional arrays from disk} +\title{Read a dense array from disk} \usage{ readArray(path, array.file.backed = TRUE, ...) } \arguments{ -\item{info}{Named list containing the metadata for this array.} +\item{path}{String containing a path to a directory, itself created by the \code{\link{saveObject}} method for a dense array.} -\item{project}{Any argument accepted by the acquisition functions, see \code{?\link{acquireFile}}. -By default, this should be a string containing the path to a staging directory.} +\item{array.file.backed}{Logical scalar indicating whether to return a file-backed S4 array class, or an ordinary R array in memory.} + +\item{...}{Further arguments, ignored.} } \value{ -A multi-dimensional object (usually a \linkS4class{DelayedMatrix}) containing the array data. +A multi-dimensional array-like object, either a \linkS4class{DelayedArray} or an ordinary R array. } \description{ -Read arrays from on-disk formats, using the corresponding \code{\link{stageObject}} method. -It should not be necessary for users to call this function manually. +Read a dense high-dimensional array from its on-disk representation. } \examples{ arr <- array(rpois(10000, 10), c(50, 20, 10)) @@ -31,6 +31,9 @@ dir <- tempfile() saveObject(arr, dir) readArray(dir) +} +\seealso{ +\code{"\link{saveObject,array-method}"}, to create the directory and its contents. } \author{ Aaron Lun diff --git a/man/saveArray.Rd b/man/saveArray.Rd index 6878181..146686e 100644 --- a/man/saveArray.Rd +++ b/man/saveArray.Rd @@ -38,6 +38,9 @@ dir <- tempfile() saveObject(mat, dir) list.files(dir) +} +\seealso{ +\code{\link{readArray}}, to read the directory contents back into the R session. } \author{ Aaron Lun diff --git a/tests/testthat/test-stage-array.R b/tests/testthat/test-save-array.R similarity index 86% rename from tests/testthat/test-stage-array.R rename to tests/testthat/test-save-array.R index e5c1084..1de5d11 100644 --- a/tests/testthat/test-stage-array.R +++ b/tests/testthat/test-save-array.R @@ -1,5 +1,5 @@ # This tests the stageObject generic for arrays. -# library(testthat); library(alabaster.matrix); source("test-stage-array.R") +# library(testthat); library(alabaster.matrix); source("test-save-array.R") experiment <- "rnaseq" assay <- "counts" @@ -27,6 +27,11 @@ test_that("stageObject works as expected for the default method", { # Checking that metadata save works. expect_error(alabaster.base::.writeMetadata(info, dir=dir), NA) + + # Trying in the new world. + tmp <- tempfile() + saveObject(arr, tmp) + expect_identical(as.array(readArray(tmp)), arr) }) test_that("stageObject works as expected for NA values", { @@ -44,6 +49,10 @@ test_that("stageObject works as expected for NA values", { arr2 <- loadArray(info, project=dir) expect_identical(DelayedArray::type(arr2), "integer") expect_equal(arr, as.array(arr2)) + + tmp <- tempfile() + saveObject(arr, tmp) + expect_identical(as.array(readArray(tmp)), arr) } arr[1] <- NA @@ -56,6 +65,10 @@ test_that("stageObject works as expected for NA values", { arr2 <- loadArray(info, project=dir) expect_identical(DelayedArray::type(arr2), "integer") expect_equal(arr, as.array(arr2)) + + tmp <- tempfile() + saveObject(arr, tmp) + expect_identical(as.array(readArray(tmp)), arr) } storage.mode(arr) <- "double" @@ -66,11 +79,15 @@ test_that("stageObject works as expected for NA values", { info <- stageObject(arr, dir, file.path(experiment, "assay-2")) fpath <- file.path(odir, "assay-2/array.h5") out <- rhdf5::h5readAttributes(fpath, "data") - expect_identical(out[["missing-value-placeholder"]], NA_real_) + #expect_identical(out[["missing-value-placeholder"]], alabaster.matrix:::lowest_double()) arr2 <- loadArray(info, project=dir) expect_identical(DelayedArray::type(arr2), "double") expect_equal(arr, as.array(arr2)) + + tmp <- tempfile() + saveObject(arr, tmp) + expect_identical(as.array(readArray(tmp)), arr) } }) @@ -87,6 +104,10 @@ test_that("stageObject works as expected without dimnames", { expect_equal(sum(arr2), sum(arr)) expect_null(rownames(arr2)) expect_null(colnames(arr2)) + + tmp <- tempfile() + saveObject(arr, tmp) + expect_identical(as.array(readArray(tmp)), arr) }) test_that("stageObject works with DelayedArrays", { @@ -247,6 +268,20 @@ test_that("reading arrays work with non-default NA placeholders", { ref <- as.matrix(x) ref[ref==0L] <- NA expect_identical(ref, as.matrix(arr2)) + + # Trying in the new world. + tmp <- tempfile() + saveObject(x, tmp) + local({ + fhandle <- H5Fopen(file.path(tmp, "array.h5")) + on.exit(H5Fclose(fhandle), add=TRUE, after=FALSE) + ghandle <- H5Gopen(fhandle, "dense_array") + on.exit(H5Gclose(ghandle), add=TRUE, after=FALSE) + dhandle <- H5Dopen(ghandle, "data") + on.exit(H5Dclose(dhandle), add=TRUE, after=FALSE) + alabaster.base::h5_write_attribute(dhandle, "missing-value-placeholder", 0L, scalar=TRUE) + }) + expect_identical(ref, as.matrix(readArray(tmp))) } # Sparse case.