Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize compute_kda_ma for memory and speed #857

Merged
merged 23 commits into from
Jan 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions nimare/meta/cbma/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def _compute_weights(self, ma_values):
"""
return None

def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"):
def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps", return_type="sparse"):
"""Collect modeled activation maps from Estimator inputs.

Parameters
Expand All @@ -231,7 +231,7 @@ def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"):
ma_maps = self.kernel_transformer.transform(
self.inputs_[coords_key],
masker=self.masker,
return_type="sparse",
return_type=return_type,
)

return ma_maps
Expand Down
56 changes: 13 additions & 43 deletions nimare/meta/cbma/mkda.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ class MKDAChi2(PairwiseCBMAEstimator):

def __init__(
self,
kernel_transformer=MKDAKernel,
kernel_transformer=MKDAKernel(),
prior=0.5,
memory=Memory(location=None, verbose=0),
memory_level=0,
Expand Down Expand Up @@ -438,35 +438,21 @@ def _fit(self, dataset1, dataset2):
self.null_distributions_ = {}

# Generate MA maps and calculate count variables for first dataset
ma_maps1 = self._collect_ma_maps(
n_selected_active_voxels = self._collect_ma_maps(
maps_key="ma_maps1",
coords_key="coordinates1",
return_type="summary_array",
)
n_selected = ma_maps1.shape[0]
n_selected_active_voxels = ma_maps1.sum(axis=0)

if isinstance(n_selected_active_voxels, sparse._coo.core.COO):
# NOTE: This may not work correctly with a non-NiftiMasker.
mask_data = self.masker.mask_img.get_fdata().astype(bool)

# Indexing the sparse array is slow, perform masking in the dense array
n_selected_active_voxels = n_selected_active_voxels.todense().reshape(-1)
n_selected_active_voxels = n_selected_active_voxels[mask_data.reshape(-1)]

del ma_maps1
n_selected = self.dataset1.coordinates["id"].unique().shape[0]

# Generate MA maps and calculate count variables for second dataset
ma_maps2 = self._collect_ma_maps(
n_unselected_active_voxels = self._collect_ma_maps(
maps_key="ma_maps2",
coords_key="coordinates2",
return_type="summary_array",
)
n_unselected = ma_maps2.shape[0]
n_unselected_active_voxels = ma_maps2.sum(axis=0)
if isinstance(n_unselected_active_voxels, sparse._coo.core.COO):
n_unselected_active_voxels = n_unselected_active_voxels.todense().reshape(-1)
n_unselected_active_voxels = n_unselected_active_voxels[mask_data.reshape(-1)]

del ma_maps2
n_unselected = self.dataset2.coordinates["id"].unique().shape[0]

n_mappables = n_selected + n_unselected

Expand Down Expand Up @@ -587,33 +573,17 @@ def _run_fwe_permutation(self, iter_xyz1, iter_xyz2, iter_df1, iter_df2, conn, v
iter_df2[["x", "y", "z"]] = iter_xyz2

# Generate MA maps and calculate count variables for first dataset
temp_ma_maps1 = self.kernel_transformer.transform(
iter_df1, self.masker, return_type="sparse"
n_selected_active_voxels = self.kernel_transformer.transform(
iter_df1, self.masker, return_type="summary_array"
)
n_selected = temp_ma_maps1.shape[0]
n_selected_active_voxels = temp_ma_maps1.sum(axis=0)

if isinstance(n_selected_active_voxels, sparse._coo.core.COO):
# NOTE: This may not work correctly with a non-NiftiMasker.
mask_data = self.masker.mask_img.get_fdata().astype(bool)

# Indexing the sparse array is slow, perform masking in the dense array
n_selected_active_voxels = n_selected_active_voxels.todense().reshape(-1)
n_selected_active_voxels = n_selected_active_voxels[mask_data.reshape(-1)]

del temp_ma_maps1
n_selected = self.dataset1.coordinates["id"].unique().shape[0]

# Generate MA maps and calculate count variables for second dataset
temp_ma_maps2 = self.kernel_transformer.transform(
iter_df2, self.masker, return_type="sparse"
n_unselected_active_voxels = self.kernel_transformer.transform(
iter_df2, self.masker, return_type="summary_array"
)
n_unselected = temp_ma_maps2.shape[0]
n_unselected_active_voxels = temp_ma_maps2.sum(axis=0)
if isinstance(n_unselected_active_voxels, sparse._coo.core.COO):
n_unselected_active_voxels = n_unselected_active_voxels.todense().reshape(-1)
n_unselected_active_voxels = n_unselected_active_voxels[mask_data.reshape(-1)]

del temp_ma_maps2
n_unselected = self.dataset2.coordinates["id"].unique().shape[0]

# Currently unused conditional probabilities
# pAgF = n_selected_active_voxels / n_selected
Expand Down
60 changes: 48 additions & 12 deletions nimare/meta/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@
class KernelTransformer(NiMAREBase):
"""Base class for modeled activation-generating methods in :mod:`~nimare.meta.kernel`.

.. versionchanged:: 0.2.1

- Add return_type='summary_array' option to transform method.

.. versionchanged:: 0.0.13

- Remove "dataset" `return_type` option.
Expand All @@ -44,7 +48,7 @@ class KernelTransformer(NiMAREBase):

Notes
-----
All extra (non-ijk) parameters for a given kernel should be overrideable as
All extra (non-ijk) parameters for a given kernel should be overridable as
parameters to __init__, so we can access them with get_params() and also
apply them to datasets with missing data.
"""
Expand Down Expand Up @@ -96,7 +100,7 @@ def transform(self, dataset, masker=None, return_type="image"):
Mask to apply to MA maps. Required if ``dataset`` is a DataFrame.
If None (and ``dataset`` is a Dataset), the Dataset's masker attribute will be used.
Default is None.
return_type : {'sparse', 'array', 'image'}, optional
return_type : {'sparse', 'array', 'image', 'summary_array'}, optional
Whether to return a sparse matrix ('sparse'), a numpy array ('array'),
or a list of niimgs ('image').
Default is 'image'.
Expand All @@ -110,6 +114,8 @@ def transform(self, dataset, masker=None, return_type="image"):
equal to `shape` of the images.
If return_type is 'array', a 2D numpy array (C x V), where C is
contrast and V is voxel.
If return_type is 'summary_array', a 1D numpy array (V,) containing
a summary measure for each voxel that has been combined across experiments.
If return_type is 'image', a list of modeled activation images
(one for each of the Contrasts in the input dataset).

Expand All @@ -121,8 +127,10 @@ def transform(self, dataset, masker=None, return_type="image"):
Name of the corresponding column in the Dataset.images DataFrame.
If :meth:`_infer_names` is executed.
"""
if return_type not in ("sparse", "array", "image"):
raise ValueError('Argument "return_type" must be "image", "array", or "sparse".')
if return_type not in ("sparse", "array", "image", "summary_array"):
raise ValueError(
'Argument "return_type" must be "image", "array", "summary_array", "sparse".'
)

if isinstance(dataset, pd.DataFrame):
assert (
Expand Down Expand Up @@ -169,9 +177,14 @@ def transform(self, dataset, masker=None, return_type="image"):
mask_data = mask.get_fdata().astype(dtype)

# Generate the MA maps
transformed_maps = self._cache(self._transform, func_memory_level=2)(mask, coordinates)
if return_type == "summary_array" or return_type == "sparse":
args = (mask, coordinates, return_type)
else:
args = (mask, coordinates)

if return_type == "sparse":
transformed_maps = self._cache(self._transform, func_memory_level=2)(*args)

if return_type == "sparse" or return_type == "summary_array":
return transformed_maps[0]

imgs = []
Expand All @@ -184,7 +197,7 @@ def transform(self, dataset, masker=None, return_type="image"):
imgs.append(img)
elif return_type == "image":
kernel_data *= mask_data
img = nib.Nifti1Image(kernel_data, mask.affine)
img = nib.Nifti1Image(kernel_data, mask.affine, dtype=kernel_data.dtype)
imgs.append(img)

del kernel_data, transformed_maps
Expand All @@ -194,7 +207,7 @@ def transform(self, dataset, masker=None, return_type="image"):
elif return_type == "image":
return imgs

def _transform(self, mask, coordinates):
def _transform(self, mask, coordinates, return_type="sparse"):
"""Apply the kernel's unique transformer.

Parameters
Expand All @@ -206,18 +219,27 @@ def _transform(self, mask, coordinates):
The DataFrame must have the following columns: "id", "i", "j", "k".
Additionally, individual kernels may require other columns
(e.g., "sample_size" for ALE).
return_type : {'sparse', 'summary_array'}, optional
Whether to return a 4D sparse matrix ('sparse') where each contrast map is
saved separately or a 1D numpy array ('summary_array') where the contrast maps
are combined.
Default is 'sparse'.

Returns
-------
transformed_maps : N-length list of (3D array, str) tuples or (4D array, 1D array) tuple
Transformed data, containing one element for each study.

- Case 1: A kernel that is not an (M)KDAKernel, with no memory limit.
- Case 1: A kernel that is not an (M)KDAKernel
Each list entry is composed of a 3D array (the MA map) and the study's ID.

- Case 2: (M)KDAKernel, with no memory limit.
- Case 2: (M)KDAKernel
There is a length-2 tuple with a 4D numpy array of the shape (N, X, Y, Z),
containing all of the MA maps, and a numpy array of shape (N,) with the study IDs.

- Case 3: A kernel with return_type='summary_array'.
The transformed data is a 1D numpy array of shape (X, Y, Z) containing the
summary measure for each voxel.
"""
pass

Expand Down Expand Up @@ -277,7 +299,7 @@ def __init__(
self.sample_size = sample_size
super().__init__(memory=memory, memory_level=memory_level)

def _transform(self, mask, coordinates):
def _transform(self, mask, coordinates, return_type="sparse"):
ijks = coordinates[["i", "j", "k"]].values
exp_idx = coordinates["id"].values

Expand Down Expand Up @@ -344,6 +366,10 @@ def _generate_description(self):
class KDAKernel(KernelTransformer):
"""Generate KDA modeled activation images from coordinates.

.. versionchanged:: 0.2.1

- Add new parameter ``return_type`` to transform method.

.. versionchanged:: 0.0.13

- Add new parameter ``memory`` to cache modeled activation (MA) maps.
Expand Down Expand Up @@ -383,16 +409,25 @@ def __init__(
self.value = value
super().__init__(memory=memory, memory_level=memory_level)

def _transform(self, mask, coordinates):
def _transform(self, mask, coordinates, return_type="sparse"):
"""Return type can either be sparse or summary_array."""
ijks = coordinates[["i", "j", "k"]].values
exp_idx = coordinates["id"].values
if return_type == "sparse":
sum_across_studies = False
elif return_type == "summary_array":
sum_across_studies = True
else:
raise ValueError('Argument "return_type" must be "sparse" or "summary_array".')

transformed = compute_kda_ma(
mask,
ijks,
self.r,
self.value,
exp_idx,
sum_overlap=self._sum_overlap,
sum_across_studies=sum_across_studies,
)
exp_ids = np.unique(exp_idx)
return transformed, exp_ids
Expand Down Expand Up @@ -421,6 +456,7 @@ class MKDAKernel(KDAKernel):
.. versionchanged:: 0.2.1

- New parameters: ``memory`` and ``memory_level`` for memory caching.
- Add new parameter ``return_type`` to transform method.

.. versionchanged:: 0.0.13

Expand Down
Loading
Loading