Skip to content

Commit

Permalink
Updated package to use chunk_grid in the latest delayedarray.
Browse files Browse the repository at this point in the history
  • Loading branch information
LTLA committed Feb 1, 2024
1 parent 7c088e0 commit 3fd21cb
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 19 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ python_requires = >=3.8
# For more information, check out https://semver.org/.
install_requires =
importlib-metadata; python_version<"3.8"
delayedarray>=0.4.0
delayedarray>=0.5.0
numpy
h5py

Expand Down
18 changes: 12 additions & 6 deletions src/hdf5array/Hdf5CompressedSparseMatrixSeed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Optional, Sequence, Tuple, Callable, Literal
from delayedarray import extract_dense_array, extract_sparse_array, chunk_shape, DelayedArray, wrap, is_sparse, SparseNdarray, is_masked
from delayedarray import extract_dense_array, extract_sparse_array, chunk_grid, DelayedArray, wrap, is_sparse, SparseNdarray, is_masked, chunk_shape_to_grid
from h5py import File
import numpy
from numpy import ndarray, dtype, integer, zeros, issubdtype, array
Expand Down Expand Up @@ -200,13 +200,19 @@ def is_sparse_Hdf5CompressedSparseMatrixSeed(x: Hdf5CompressedSparseMatrixSeed):
return True


@chunk_shape.register
def chunk_shape_Hdf5CompressedSparseMatrixSeed(x: Hdf5CompressedSparseMatrixSeed):
"""See :py:meth:`~delayedarray.chunk_shape.chunk_shape`."""
@chunk_grid.register
def chunk_grid_Hdf5CompressedSparseMatrixSeed(x: Hdf5CompressedSparseMatrixSeed):
"""
See :py:meth:`~delayedarray.chunk_grid.chunk_grid`.
The cost factor is set to 20 to reflect the computational work involved in
extracting data from disk.
"""
if x._by_column:
return (x._shape[0], 1)
chunks = (x._shape[0], 1)
else:
return (1, x._shape[1])
chunks = (1, x._shape[1])
return chunk_shape_to_grid(chunks, x.shape, cost_factor=20)


def _extract_array(
Expand Down
15 changes: 10 additions & 5 deletions src/hdf5array/Hdf5DenseArraySeed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Optional, Sequence, Tuple, Union
from delayedarray import extract_dense_array, chunk_shape, DelayedArray, wrap, is_masked
from delayedarray import extract_dense_array, chunk_grid, DelayedArray, wrap, is_masked, chunk_shape_to_grid
from h5py import File
import numpy
from numpy import ndarray, dtype, asfortranarray, ix_
Expand Down Expand Up @@ -98,10 +98,15 @@ def name(self) -> str:
return self._name


@chunk_shape.register
def chunk_shape_Hdf5DenseArraySeed(x: Hdf5DenseArraySeed):
"""See :py:meth:`~delayedarray.chunk_shape.chunk_shape`."""
return x._chunks
@chunk_grid.register
def chunk_grid_Hdf5DenseArraySeed(x: Hdf5DenseArraySeed):
"""
See :py:meth:`~delayedarray.chunk_grid.chunk_grid`.
The cost factor is set to 20 to reflect the computational work involved in
extracting data from disk.
"""
return chunk_shape_to_grid(x._chunks, x._shape, cost_factor=20)


@extract_dense_array.register
Expand Down
10 changes: 6 additions & 4 deletions tests/test_Hdf5CompressedSparseMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import tempfile
import scipy.sparse

from utils import chunk_shape

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"
Expand All @@ -30,7 +32,7 @@ def test_Hdf5CompressedSparseMatrix_column():

assert arr.shape == shape
assert arr.dtype == y.dtype
assert delayedarray.chunk_shape(arr) == (100, 1)
assert chunk_shape(arr) == (100, 1)
assert (delayedarray.to_dense_array(arr) == y.toarray()).all()
assert not delayedarray.is_masked(arr)

Expand Down Expand Up @@ -63,7 +65,7 @@ def test_Hdf5CompressedSparseMatrix_row():

assert arr.shape == shape
assert arr.dtype == y.dtype
assert delayedarray.chunk_shape(arr) == (1, 200)
assert chunk_shape(arr) == (1, 200)
assert (delayedarray.to_dense_array(arr) == y.toarray()).all()

# Check that consecutive slicing works as expected.
Expand Down Expand Up @@ -95,7 +97,7 @@ def test_Hdf5CompressedSparseMatrix_dtype():

assert arr.shape == shape
assert arr.dtype == numpy.int16
assert delayedarray.chunk_shape(arr) == (55, 1)
assert chunk_shape(arr) == (55, 1)

as_dense = delayedarray.to_dense_array(arr)
assert (as_dense == y.toarray()).all()
Expand Down Expand Up @@ -136,4 +138,4 @@ def test_Hdf5CompressedSparseMatrix_to_sparse():
assert isinstance(_to_csc, scipy.sparse.csc_matrix)

_to_coo = delayedarray.to_scipy_sparse_matrix(arr, "coo")
assert isinstance(_to_coo, scipy.sparse.coo_matrix)
assert isinstance(_to_coo, scipy.sparse.coo_matrix)
8 changes: 5 additions & 3 deletions tests/test_Hdf5DenseArray.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import delayedarray
import tempfile

from utils import chunk_shape

__author__ = "jkanche"
__copyright__ = "jkanche"
__license__ = "MIT"
Expand All @@ -26,7 +28,7 @@ def test_Hdf5DenseArray_native():

assert arr.shape == y.shape
assert arr.dtype == y.dtype
assert delayedarray.chunk_shape(arr) == chunk_sizes
assert chunk_shape(arr) == chunk_sizes
assert (delayedarray.to_dense_array(arr) == y).all()
assert not delayedarray.is_masked(arr)

Expand Down Expand Up @@ -56,7 +58,7 @@ def test_Hdf5DenseArray_non_native():
actual_chunk_sizes = (*list(reversed(chunk_sizes)),)
assert arr.shape == actual_shape
assert arr.dtype == y.dtype
assert delayedarray.chunk_shape(arr) == actual_chunk_sizes
assert chunk_shape(arr) == actual_chunk_sizes
assert (delayedarray.to_dense_array(arr) == y.T).all()

# Check that the slicing works as expected.
Expand All @@ -83,7 +85,7 @@ def test_Hdf5DenseArray_new_type():

assert arr.shape == test_shape
assert arr.dtype == numpy.dtype("int32")
assert delayedarray.chunk_shape(arr) == chunk_sizes
assert chunk_shape(arr) == chunk_sizes
assert (delayedarray.to_dense_array(arr) == y.astype(numpy.int32)).all()


Expand Down
6 changes: 6 additions & 0 deletions tests/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import delayedarray


def chunk_shape(x):
grid = delayedarray.chunk_grid(x)
return (grid.boundaries[0][0], grid.boundaries[1][0])

0 comments on commit 3fd21cb

Please sign in to comment.