-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: separate classes for sparse and dense formats
- Loading branch information
Showing
11 changed files
with
188 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,9 @@ | ||
# Changelog | ||
|
||
## Version 0.1 (development) | ||
## Version 0.0.1 | ||
|
||
- initial classes for H5 backed matrices | ||
|
||
## Version 0.0.3 | ||
|
||
- separate dense and sparse matrix classes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
from typing import Optional, Sequence, Tuple, Union | ||
|
||
import h5py | ||
|
||
from .utils import _check_indices, infer_h5_dataset | ||
|
||
__author__ = "jkanche" | ||
__copyright__ = "jkanche" | ||
__license__ = "MIT" | ||
|
||
|
||
class H5BackedDenseData: | ||
"""H5 backed dense matrix or array store. | ||
Args: | ||
path (str): Path to the H5 file. | ||
group (str): Group inside the file that contains the matrix or array. | ||
order (str): dense matrix representation, ‘C’, ‘F’, | ||
row-major (C-style) or column-major (Fortran-style) order. | ||
""" | ||
|
||
def __init__(self, path: str, group: str, order: str = "C") -> None: | ||
"""Initialize a H5 Backed array. | ||
Args: | ||
path (str): Path to the H5 file. | ||
group (str): Group inside the file that contains the matrix or array. | ||
order (str): dense matrix representation, ‘C’, ‘F’, | ||
row-major (C-style) or column-major (Fortran-style) order. | ||
""" | ||
self._h5file = h5py.File(path, mode="r") | ||
self._dataset = self._h5file[group] | ||
self._dataset_info = infer_h5_dataset(self._dataset) | ||
self._order = order | ||
|
||
if self._dataset_info.format != "dense": | ||
raise ValueError("File does not contain a dense matrix") | ||
|
||
@property | ||
def shape(self) -> Tuple[int, int]: | ||
"""Get shape of the dataset. | ||
Returns: | ||
Tuple[int, int]: number of rows by columns. | ||
""" | ||
if self._order == "C": | ||
return self._dataset_info.shape | ||
else: | ||
return self._dataset_info.shape[::-1] | ||
|
||
@property | ||
def dtype(self) -> str: | ||
"""Get type of values stored in the dataset. | ||
Returns: | ||
str: type of dataset, e.g. int8, float etc. | ||
""" | ||
return self._dataset_info.dtype | ||
|
||
@property | ||
def mat_format(self) -> str: | ||
"""Get dense matrix format. | ||
either row-major (C-style) or column-major (Fortran-style) order. | ||
Returns: | ||
str: matrix format. | ||
""" | ||
return self._order | ||
|
||
def __getitem__( | ||
self, | ||
args: Tuple[Union[slice, Sequence[int]], Optional[Union[slice, Sequence[int]]]], | ||
): | ||
if len(args) == 0: | ||
raise ValueError("Arguments must contain one slice") | ||
|
||
rowIndices = _check_indices(args[0]) | ||
colIndices = None | ||
|
||
if len(args) > 1: | ||
if args[1] is not None: | ||
colIndices = _check_indices(args[1]) | ||
elif len(args) > 2: | ||
raise ValueError("contains too many slices") | ||
|
||
if colIndices is None: | ||
colIndices = slice(0) | ||
|
||
if self.mat_format == "C": | ||
return self._dataset[rowIndices, colIndices] | ||
else: | ||
return self._dataset[colIndices, rowIndices] | ||
|
||
# TODO: switch to weak refs at some point | ||
def __del__(self): | ||
self._h5file.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import numpy as np | ||
from filebackedarray import H5BackedDenseData | ||
|
||
__author__ = "jkanche" | ||
__copyright__ = "jkanche" | ||
__license__ = "MIT" | ||
|
||
|
||
def test_h5_dense_backed_C(): | ||
assay = H5BackedDenseData("tests/data/dense.h5", "dense_C") | ||
|
||
assert assay is not None | ||
assert isinstance(assay, H5BackedDenseData) | ||
assert assay.shape == (100, 100) | ||
assert assay.mat_format == "C" | ||
assert assay.dtype is not None | ||
|
||
asy_slice = assay[0:2, 1:4] | ||
assert isinstance(asy_slice, np.ndarray) | ||
assert asy_slice.shape == (2, 3) | ||
|
||
|
||
def test_h5_dense_backed_F(): | ||
assay = H5BackedDenseData("tests/data/dense_F.h5", "dense_F", order="F") | ||
|
||
assert assay is not None | ||
assert isinstance(assay, H5BackedDenseData) | ||
assert assay.shape == (3, 2) | ||
assert assay.mat_format == "F" | ||
|
||
asy_slice = assay[0:1, 0:2] | ||
assert isinstance(asy_slice, np.ndarray) | ||
assert asy_slice.shape == (2, 1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters