diff --git a/nimare/meta/cbma/base.py b/nimare/meta/cbma/base.py index 96cde25b1..a6bd13913 100644 --- a/nimare/meta/cbma/base.py +++ b/nimare/meta/cbma/base.py @@ -205,7 +205,7 @@ def _compute_weights(self, ma_values): """ return None - def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"): + def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps", return_type="sparse"): """Collect modeled activation maps from Estimator inputs. Parameters @@ -231,7 +231,7 @@ def _collect_ma_maps(self, coords_key="coordinates", maps_key="ma_maps"): ma_maps = self.kernel_transformer.transform( self.inputs_[coords_key], masker=self.masker, - return_type="sparse", + return_type=return_type, ) return ma_maps diff --git a/nimare/meta/cbma/mkda.py b/nimare/meta/cbma/mkda.py index a62e73bb5..e3d6b42bd 100644 --- a/nimare/meta/cbma/mkda.py +++ b/nimare/meta/cbma/mkda.py @@ -388,7 +388,7 @@ class MKDAChi2(PairwiseCBMAEstimator): def __init__( self, - kernel_transformer=MKDAKernel, + kernel_transformer=MKDAKernel(), prior=0.5, memory=Memory(location=None, verbose=0), memory_level=0, @@ -438,35 +438,21 @@ def _fit(self, dataset1, dataset2): self.null_distributions_ = {} # Generate MA maps and calculate count variables for first dataset - ma_maps1 = self._collect_ma_maps( + n_selected_active_voxels = self._collect_ma_maps( maps_key="ma_maps1", coords_key="coordinates1", + return_type="summary_array", ) - n_selected = ma_maps1.shape[0] - n_selected_active_voxels = ma_maps1.sum(axis=0) - - if isinstance(n_selected_active_voxels, sparse._coo.core.COO): - # NOTE: This may not work correctly with a non-NiftiMasker. - mask_data = self.masker.mask_img.get_fdata().astype(bool) - - # Indexing the sparse array is slow, perform masking in the dense array - n_selected_active_voxels = n_selected_active_voxels.todense().reshape(-1) - n_selected_active_voxels = n_selected_active_voxels[mask_data.reshape(-1)] - del ma_maps1 + n_selected = self.dataset1.coordinates["id"].unique().shape[0] # Generate MA maps and calculate count variables for second dataset - ma_maps2 = self._collect_ma_maps( + n_unselected_active_voxels = self._collect_ma_maps( maps_key="ma_maps2", coords_key="coordinates2", + return_type="summary_array", ) - n_unselected = ma_maps2.shape[0] - n_unselected_active_voxels = ma_maps2.sum(axis=0) - if isinstance(n_unselected_active_voxels, sparse._coo.core.COO): - n_unselected_active_voxels = n_unselected_active_voxels.todense().reshape(-1) - n_unselected_active_voxels = n_unselected_active_voxels[mask_data.reshape(-1)] - - del ma_maps2 + n_unselected = self.dataset2.coordinates["id"].unique().shape[0] n_mappables = n_selected + n_unselected @@ -587,33 +573,17 @@ def _run_fwe_permutation(self, iter_xyz1, iter_xyz2, iter_df1, iter_df2, conn, v iter_df2[["x", "y", "z"]] = iter_xyz2 # Generate MA maps and calculate count variables for first dataset - temp_ma_maps1 = self.kernel_transformer.transform( - iter_df1, self.masker, return_type="sparse" + n_selected_active_voxels = self.kernel_transformer.transform( + iter_df1, self.masker, return_type="summary_array" ) - n_selected = temp_ma_maps1.shape[0] - n_selected_active_voxels = temp_ma_maps1.sum(axis=0) - if isinstance(n_selected_active_voxels, sparse._coo.core.COO): - # NOTE: This may not work correctly with a non-NiftiMasker. - mask_data = self.masker.mask_img.get_fdata().astype(bool) - - # Indexing the sparse array is slow, perform masking in the dense array - n_selected_active_voxels = n_selected_active_voxels.todense().reshape(-1) - n_selected_active_voxels = n_selected_active_voxels[mask_data.reshape(-1)] - - del temp_ma_maps1 + n_selected = self.dataset1.coordinates["id"].unique().shape[0] # Generate MA maps and calculate count variables for second dataset - temp_ma_maps2 = self.kernel_transformer.transform( - iter_df2, self.masker, return_type="sparse" + n_unselected_active_voxels = self.kernel_transformer.transform( + iter_df2, self.masker, return_type="summary_array" ) - n_unselected = temp_ma_maps2.shape[0] - n_unselected_active_voxels = temp_ma_maps2.sum(axis=0) - if isinstance(n_unselected_active_voxels, sparse._coo.core.COO): - n_unselected_active_voxels = n_unselected_active_voxels.todense().reshape(-1) - n_unselected_active_voxels = n_unselected_active_voxels[mask_data.reshape(-1)] - - del temp_ma_maps2 + n_unselected = self.dataset2.coordinates["id"].unique().shape[0] # Currently unused conditional probabilities # pAgF = n_selected_active_voxels / n_selected diff --git a/nimare/meta/kernel.py b/nimare/meta/kernel.py index eb2fba920..9ea1bf31b 100644 --- a/nimare/meta/kernel.py +++ b/nimare/meta/kernel.py @@ -23,6 +23,10 @@ class KernelTransformer(NiMAREBase): """Base class for modeled activation-generating methods in :mod:`~nimare.meta.kernel`. + .. versionchanged:: 0.2.1 + + - Add return_type='summary_array' option to transform method. + .. versionchanged:: 0.0.13 - Remove "dataset" `return_type` option. @@ -44,7 +48,7 @@ class KernelTransformer(NiMAREBase): Notes ----- - All extra (non-ijk) parameters for a given kernel should be overrideable as + All extra (non-ijk) parameters for a given kernel should be overridable as parameters to __init__, so we can access them with get_params() and also apply them to datasets with missing data. """ @@ -96,7 +100,7 @@ def transform(self, dataset, masker=None, return_type="image"): Mask to apply to MA maps. Required if ``dataset`` is a DataFrame. If None (and ``dataset`` is a Dataset), the Dataset's masker attribute will be used. Default is None. - return_type : {'sparse', 'array', 'image'}, optional + return_type : {'sparse', 'array', 'image', 'summary_array'}, optional Whether to return a sparse matrix ('sparse'), a numpy array ('array'), or a list of niimgs ('image'). Default is 'image'. @@ -110,6 +114,8 @@ def transform(self, dataset, masker=None, return_type="image"): equal to `shape` of the images. If return_type is 'array', a 2D numpy array (C x V), where C is contrast and V is voxel. + If return_type is 'summary_array', a 1D numpy array (V,) containing + a summary measure for each voxel that has been combined across experiments. If return_type is 'image', a list of modeled activation images (one for each of the Contrasts in the input dataset). @@ -121,8 +127,10 @@ def transform(self, dataset, masker=None, return_type="image"): Name of the corresponding column in the Dataset.images DataFrame. If :meth:`_infer_names` is executed. """ - if return_type not in ("sparse", "array", "image"): - raise ValueError('Argument "return_type" must be "image", "array", or "sparse".') + if return_type not in ("sparse", "array", "image", "summary_array"): + raise ValueError( + 'Argument "return_type" must be "image", "array", "summary_array", "sparse".' + ) if isinstance(dataset, pd.DataFrame): assert ( @@ -169,9 +177,14 @@ def transform(self, dataset, masker=None, return_type="image"): mask_data = mask.get_fdata().astype(dtype) # Generate the MA maps - transformed_maps = self._cache(self._transform, func_memory_level=2)(mask, coordinates) + if return_type == "summary_array" or return_type == "sparse": + args = (mask, coordinates, return_type) + else: + args = (mask, coordinates) - if return_type == "sparse": + transformed_maps = self._cache(self._transform, func_memory_level=2)(*args) + + if return_type == "sparse" or return_type == "summary_array": return transformed_maps[0] imgs = [] @@ -184,7 +197,7 @@ def transform(self, dataset, masker=None, return_type="image"): imgs.append(img) elif return_type == "image": kernel_data *= mask_data - img = nib.Nifti1Image(kernel_data, mask.affine) + img = nib.Nifti1Image(kernel_data, mask.affine, dtype=kernel_data.dtype) imgs.append(img) del kernel_data, transformed_maps @@ -194,7 +207,7 @@ def transform(self, dataset, masker=None, return_type="image"): elif return_type == "image": return imgs - def _transform(self, mask, coordinates): + def _transform(self, mask, coordinates, return_type="sparse"): """Apply the kernel's unique transformer. Parameters @@ -206,18 +219,27 @@ def _transform(self, mask, coordinates): The DataFrame must have the following columns: "id", "i", "j", "k". Additionally, individual kernels may require other columns (e.g., "sample_size" for ALE). + return_type : {'sparse', 'summary_array'}, optional + Whether to return a 4D sparse matrix ('sparse') where each contrast map is + saved separately or a 1D numpy array ('summary_array') where the contrast maps + are combined. + Default is 'sparse'. Returns ------- transformed_maps : N-length list of (3D array, str) tuples or (4D array, 1D array) tuple Transformed data, containing one element for each study. - - Case 1: A kernel that is not an (M)KDAKernel, with no memory limit. + - Case 1: A kernel that is not an (M)KDAKernel Each list entry is composed of a 3D array (the MA map) and the study's ID. - - Case 2: (M)KDAKernel, with no memory limit. + - Case 2: (M)KDAKernel There is a length-2 tuple with a 4D numpy array of the shape (N, X, Y, Z), containing all of the MA maps, and a numpy array of shape (N,) with the study IDs. + + - Case 3: A kernel with return_type='summary_array'. + The transformed data is a 1D numpy array of shape (X, Y, Z) containing the + summary measure for each voxel. """ pass @@ -277,7 +299,7 @@ def __init__( self.sample_size = sample_size super().__init__(memory=memory, memory_level=memory_level) - def _transform(self, mask, coordinates): + def _transform(self, mask, coordinates, return_type="sparse"): ijks = coordinates[["i", "j", "k"]].values exp_idx = coordinates["id"].values @@ -344,6 +366,10 @@ def _generate_description(self): class KDAKernel(KernelTransformer): """Generate KDA modeled activation images from coordinates. + .. versionchanged:: 0.2.1 + + - Add new parameter ``return_type`` to transform method. + .. versionchanged:: 0.0.13 - Add new parameter ``memory`` to cache modeled activation (MA) maps. @@ -383,9 +409,17 @@ def __init__( self.value = value super().__init__(memory=memory, memory_level=memory_level) - def _transform(self, mask, coordinates): + def _transform(self, mask, coordinates, return_type="sparse"): + """Return type can either be sparse or summary_array.""" ijks = coordinates[["i", "j", "k"]].values exp_idx = coordinates["id"].values + if return_type == "sparse": + sum_across_studies = False + elif return_type == "summary_array": + sum_across_studies = True + else: + raise ValueError('Argument "return_type" must be "sparse" or "summary_array".') + transformed = compute_kda_ma( mask, ijks, @@ -393,6 +427,7 @@ def _transform(self, mask, coordinates): self.value, exp_idx, sum_overlap=self._sum_overlap, + sum_across_studies=sum_across_studies, ) exp_ids = np.unique(exp_idx) return transformed, exp_ids @@ -421,6 +456,7 @@ class MKDAKernel(KDAKernel): .. versionchanged:: 0.2.1 - New parameters: ``memory`` and ``memory_level`` for memory caching. + - Add new parameter ``return_type`` to transform method. .. versionchanged:: 0.0.13 diff --git a/nimare/meta/utils.py b/nimare/meta/utils.py index 262bb519c..736628da9 100755 --- a/nimare/meta/utils.py +++ b/nimare/meta/utils.py @@ -9,6 +9,47 @@ from nimare.utils import unique_rows +@jit(nopython=True, cache=True) +def _convolve_sphere(kernel, ijks, index, max_shape): + """Convolve peaks with a spherical kernel. + + Parameters + ---------- + kernel : 2D numpy.ndarray + IJK coordinates of a sphere, relative to a central point + (not the brain template). + peaks : 2D numpy.ndarray + The IJK coordinates of peaks to convolve with the kernel. + max_shape: 1D numpy.ndarray + The maximum shape of the image volume. + + Returns + ------- + sphere_coords : 2D numpy.ndarray + All coordinates that fall within any sphere.ß∑ + Coordinates from overlapping spheres will appear twice. + """ + + def np_all_axis1(x): + """Numba compatible version of np.all(x, axis=1).""" + out = np.ones(x.shape[0], dtype=np.bool8) + for i in range(x.shape[1]): + out = np.logical_and(out, x[:, i]) + return out + + peaks = ijks[index] + sphere_coords = np.zeros((kernel.shape[1] * len(peaks), 3), dtype=np.int32) + chunk_idx = np.arange(0, (kernel.shape[1]), dtype=np.int64) + for peak in peaks: + sphere_coords[chunk_idx, :] = kernel.T + peak + chunk_idx = chunk_idx + kernel.shape[1] + + # Mask coordinates beyond space + idx = np_all_axis1(np.logical_and(sphere_coords >= 0, np.less(sphere_coords, max_shape))) + + return sphere_coords[idx, :] + + def compute_kda_ma( mask, ijks, @@ -16,6 +57,7 @@ def compute_kda_ma( value=1.0, exp_idx=None, sum_overlap=False, + sum_across_studies=False, ): """Compute (M)KDA modeled activation (MA) map. @@ -49,6 +91,8 @@ def compute_kda_ma( come from the same experiment. sum_overlap : :obj:`bool` Whether to sum voxel values in overlapping spheres. + sum_across_studies : :obj:`bool` + Whether to sum voxel values across studies. Returns ------- @@ -58,6 +102,11 @@ def compute_kda_ma( is returned, where the first dimension has size equal to the number of unique experiments, and the remaining 3 dimensions are equal to `shape`. """ + if sum_overlap and sum_across_studies: + raise NotImplementedError("sum_overlap and sum_across_studies cannot both be True.") + + # recast ijks to int32 to reduce memory footprint + ijks = ijks.astype(np.int32) shape = mask.shape vox_dims = mask.header.get_zooms() @@ -73,77 +122,59 @@ def compute_kda_ma( n_dim = ijks.shape[1] xx, yy, zz = [slice(-r // vox_dims[i], r // vox_dims[i] + 0.01, 1) for i in range(n_dim)] - cube = np.vstack([row.ravel() for row in np.mgrid[xx, yy, zz]]) + cube = np.vstack( + [row.ravel() for row in np.mgrid[xx, yy, zz]], dtype=np.int32, casting="unsafe" + ) kernel = cube[:, np.sum(np.dot(np.diag(vox_dims), cube) ** 2, 0) ** 0.5 <= r] - def _convolve_sphere(kernel, peaks): - """Convolve peaks with a spherical kernel. - - Parameters - ---------- - kernel : 2D numpy.ndarray - IJK coordinates of a sphere, relative to a central point - (not the brain template). - peaks : 2D numpy.ndarray - The IJK coordinates of peaks to convolve with the kernel. - - Returns - ------- - sphere_coords : 2D numpy.ndarray - All coordinates that fall within any sphere. - Coordinates from overlapping spheres will appear twice. - """ - # Convolve spheres - sphere_coords = np.zeros((kernel.shape[1] * len(peaks), 3), dtype=int) - chunk_idx = np.arange(0, (kernel.shape[1]), dtype=int) - for peak in peaks: - sphere_coords[chunk_idx, :] = kernel.T + peak - chunk_idx = chunk_idx + kernel.shape[1] - - return sphere_coords + if sum_across_studies: + all_values = np.zeros(shape, dtype=np.int32) - all_coords = [] - all_exp = [] - all_data = [] - # Loop over experiments - for i_exp, _ in enumerate(exp_idx_uniq): - # Index peaks by experiment - curr_exp_idx = exp_idx == i_exp - peaks = ijks[curr_exp_idx] + # Loop over experiments + for i_exp, _ in enumerate(exp_idx_uniq): + # Index peaks by experiment + curr_exp_idx = exp_idx == i_exp + sphere_coords = _convolve_sphere(kernel, ijks, curr_exp_idx, np.array(shape)) - all_spheres = _convolve_sphere(kernel, peaks) + # preallocate array for current study + study_values = np.zeros(shape, dtype=np.int32) + study_values[sphere_coords[:, 0], sphere_coords[:, 1], sphere_coords[:, 2]] = value - if sum_overlap: - all_spheres, counts = unique_rows(all_spheres, return_counts=True) - counts = counts * value - else: - all_spheres = unique_rows(all_spheres) + # Sum across studies + all_values += study_values - # Mask coordinates beyond space - idx = np.all( - np.concatenate([all_spheres >= 0, np.less(all_spheres, shape)], axis=1), axis=1 - ) + # Only return values within the mask + all_values = all_values.reshape(-1) + kernel_data = all_values[mask_data.reshape(-1)] - all_spheres = all_spheres[idx, :] + else: + all_coords = [] + # Loop over experiments + for i_exp, _ in enumerate(exp_idx_uniq): + curr_exp_idx = exp_idx == i_exp + # Convolve with sphere + all_spheres = _convolve_sphere(kernel, ijks, curr_exp_idx, np.array(shape)) - sphere_idx_inside_mask = np.where(mask_data[tuple(all_spheres.T)])[0] - sphere_idx_filtered = all_spheres[sphere_idx_inside_mask, :].T - nonzero_idx = tuple(sphere_idx_filtered) + if not sum_overlap: + all_spheres = unique_rows(all_spheres) - if sum_overlap: - nonzero_to_append = counts[idx][sphere_idx_inside_mask] - else: - nonzero_to_append = np.ones((len(sphere_idx_inside_mask),)) * value + # Apply mask + sphere_idx_inside_mask = np.where(mask_data[tuple(all_spheres.T)])[0] + all_spheres = all_spheres[sphere_idx_inside_mask, :] - all_exp.append(np.full(nonzero_idx[0].shape[0], i_exp)) - all_coords.append(np.vstack(nonzero_idx)) - all_data.append(nonzero_to_append) + # Combine experiment id with coordinates + all_coords.append(all_spheres) - exp = np.hstack(all_exp) - coords = np.vstack((exp.flatten(), np.hstack(all_coords))) + # Add exp_idx to coordinates + exp_shapes = [coords.shape[0] for coords in all_coords] + exp_indicator = np.repeat(np.arange(len(exp_shapes)), exp_shapes) - data = np.hstack(all_data).flatten().astype(np.int32) - kernel_data = sparse.COO(coords, data, shape=kernel_shape) + all_coords = np.vstack(all_coords).T + all_coords = np.insert(all_coords, 0, exp_indicator, axis=0) + + kernel_data = sparse.COO( + all_coords, data=value, has_duplicates=sum_overlap, shape=kernel_shape + ) return kernel_data diff --git a/nimare/reports/base.py b/nimare/reports/base.py index 317bcf0b3..adad9a025 100644 --- a/nimare/reports/base.py +++ b/nimare/reports/base.py @@ -51,6 +51,7 @@ "kernel_transformer__value": "Value for sphere", "kernel_transformer__memory": "Memory", "kernel_transformer__memory_level": "Memory level", + "kernel_transformer__sum_across_studies": "Sum Across Studies", "memory": "Memory", "memory_level": "Memory level", "null_method": "Null method", diff --git a/nimare/tests/test_meta_kernel.py b/nimare/tests/test_meta_kernel.py index 49f99f3b8..870e4f665 100644 --- a/nimare/tests/test_meta_kernel.py +++ b/nimare/tests/test_meta_kernel.py @@ -194,3 +194,24 @@ def test_ALEKernel_memory(testdata_cbma, tmp_path_factory): assert np.array_equal(ma_maps_cached_fast, ma_maps) assert elapsed_cached < elapsed + + +def test_MKDA_kernel_sum_across(testdata_cbma): + """Test if creating a summary array is equivalent to summing across the sparse array.""" + kern = kernel.MKDAKernel(r=10, value=1) + coordinates = testdata_cbma.coordinates.copy() + sparse_ma_maps = kern.transform(coordinates, masker=testdata_cbma.masker, return_type="sparse") + summary_map = kern.transform( + coordinates, masker=testdata_cbma.masker, return_type="summary_array" + ) + + summary_sparse_ma_map = sparse_ma_maps.sum(axis=0) + mask_data = testdata_cbma.masker.mask_img.get_fdata().astype(bool) + + # Indexing the sparse array is slow, perform masking in the dense array + summary_sparse_ma_map = summary_sparse_ma_map.todense().reshape(-1) + summary_sparse_ma_map = summary_sparse_ma_map[mask_data.reshape(-1)] + + assert ( + np.testing.assert_array_equal(summary_map, summary_sparse_ma_map.astype(np.int32)) is None + ) diff --git a/setup.cfg b/setup.cfg index f035e3201..83279717a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -90,13 +90,14 @@ tests = pytest-cov minimum = matplotlib==3.5.2 - nibabel==3.2.0 + nibabel==4.0.0 nilearn==0.10.1 - numpy==1.22 + numpy==1.24.1 pandas==2.0.0 pymare==0.0.4rc2 scikit-learn==1.0.0 scipy==1.6.0 + seaborn==0.13.0 all = %(gzip)s %(cbmr)s