diff --git a/.gitignore b/.gitignore index 4fdec28..9d3d023 100644 --- a/.gitignore +++ b/.gitignore @@ -165,3 +165,6 @@ r_package/msentropy/src/RcppExports.cpp ms_entropy/spectra/entropy_cython.c local/ ms_entropy/entropy_search/fast_flash_entropy_search_cpython.c +ms_entropy/entropy_search/entropy_search.csv +ms_entropy/entropy_search/entropy_search.xlsx +ms_entropy/entropy_search/Untitled-1.ipynb diff --git a/ms_entropy/entropy_search/__init__.py b/ms_entropy/entropy_search/__init__.py index b2a1318..58882db 100644 --- a/ms_entropy/entropy_search/__init__.py +++ b/ms_entropy/entropy_search/__init__.py @@ -1,3 +1,4 @@ from .flash_entropy_search import FlashEntropySearch from .flash_entropy_search_core import FlashEntropySearchCore -from .flash_entropy_search_core_low_memory import FlashEntropySearchCoreLowMemory \ No newline at end of file +from .flash_entropy_search_core_low_memory import FlashEntropySearchCoreLowMemory +from ..version import __version__ \ No newline at end of file diff --git a/ms_entropy/entropy_search/flash_entropy_search.py b/ms_entropy/entropy_search/flash_entropy_search.py index 1e82d49..7eca6a8 100644 --- a/ms_entropy/entropy_search/flash_entropy_search.py +++ b/ms_entropy/entropy_search/flash_entropy_search.py @@ -114,7 +114,7 @@ def hybrid_search(self, precursor_mz, peaks, ms2_tolerance_in_da, target="cpu", :return: The entropy similarity score for each spectrum in the library, a numpy array with shape (N,), N is the number of spectra in the library. """ - return self.entropy_search.search_hybrid(target=target, precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da) + return self.entropy_search.search_hybrid(target=target, precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da,**kwargs) def clean_spectrum_for_search( self, precursor_mz, peaks, precursor_ions_removal_da: float = 1.6, noise_threshold=0.01, min_ms2_difference_in_da: float = 0.05, max_peak_num: int = 0 @@ -157,6 +157,7 @@ def search( noise_threshold=0.01, min_ms2_difference_in_da: float = 0.05, max_peak_num: int = None, + **kwargs, ): """ Run the Flash entropy search for the query spectrum. @@ -195,18 +196,40 @@ def search( method = {method} result = {} + if "identity" in method: - result["identity_search"] = self.identity_search( - precursor_mz=precursor_mz, peaks=peaks, ms1_tolerance_in_da=ms1_tolerance_in_da, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target + tmp = self.identity_search( + precursor_mz=precursor_mz, peaks=peaks, ms1_tolerance_in_da=ms1_tolerance_in_da, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target,**kwargs ) + if len(tmp) == 1: + result["identity_search"] = tmp + else: + result["identity_search"] = tmp[0] + result["identity_MP"] = tmp[1] if "open" in method: - result["open_search"] = self.open_search(peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target) + tmp = self.open_search(peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target,**kwargs) + if len(tmp) == 1: + result["open_search"] = tmp + else: + result["open_search"] = tmp[0] + result["open_MP"] = tmp[1] + if "neutral_loss" in method: - result["neutral_loss_search"] = self.neutral_loss_search( - precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target + tmp = self.neutral_loss_search( + precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target,**kwargs ) + if len(tmp) == 1: + result["neutral_loss_search"] = tmp + else: + result["neutral_loss_search"] = tmp[0] + result["neutral_loss_MP"] = tmp[1] if "hybrid" in method: - result["hybrid_search"] = self.hybrid_search(precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target) + tmp = self.hybrid_search(precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target,**kwargs) + if len(tmp) == 1: + result["hybrid_search"] = tmp + else: + result["hybrid_search"] = tmp[0] + result["hybrid_MP"] = tmp[1] return result def build_index( diff --git a/ms_entropy/entropy_search/flash_entropy_search_core.py b/ms_entropy/entropy_search/flash_entropy_search_core.py index 91dbd62..c1d489a 100644 --- a/ms_entropy/entropy_search/flash_entropy_search_core.py +++ b/ms_entropy/entropy_search/flash_entropy_search_core.py @@ -188,7 +188,7 @@ def search( entropy_similarity[search_spectra_idx_max:] = 0 return entropy_similarity - def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_tolerance_in_da=0.02): + def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_tolerance_in_da=0.02, output_matched_peak_number=False): """ Perform the hybrid search for the MS/MS spectra. @@ -241,6 +241,8 @@ def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_toleran if target == "cpu": entropy_similarity = np.zeros(self.total_spectra_num, dtype=np.float32) + if output_matched_peak_number: + matched_peak_number = np.zeros(self.total_spectra_num, dtype=np.int32) # Go through all the peaks in the spectrum and calculate the entropy similarity for peak_idx, (mz, intensity) in enumerate(peaks): ############################################################### @@ -253,7 +255,8 @@ def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_toleran modified_value_product = self._score_peaks_with_cpu(intensity, all_ions_intensity[product_mz_idx_min:product_mz_idx_max]) entropy_similarity[modified_idx_product] += modified_value_product - + if output_matched_peak_number: + matched_peak_number[modified_idx_product] += 1 ############################################################### # Match the neutral loss ions mz_nl = precursor_mz - mz @@ -268,19 +271,32 @@ def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_toleran # Calculate the entropy similarity for this matched peak modified_idx_nl = all_nl_spec_idx[neutral_loss_mz_idx_min:neutral_loss_mz_idx_max] modified_value_nl = self._score_peaks_with_cpu(intensity, all_nl_intensity[neutral_loss_mz_idx_min:neutral_loss_mz_idx_max]) + if output_matched_peak_number: + matched_peak_number[modified_idx_nl] += 1 # Check if the neutral loss ion is already matched to other query peak as a product ion nl_matched_product_ion_idx = all_ions_idx_for_nl[neutral_loss_mz_idx_min:neutral_loss_mz_idx_max] s1 = np.searchsorted(product_peak_match_idx_min, nl_matched_product_ion_idx, side="right") s2 = np.searchsorted(product_peak_match_idx_max - 1, nl_matched_product_ion_idx, side="left") + modified_value_nl[s1 > s2] = 0 + + if output_matched_peak_number: + + matched_peak_number[modified_idx_nl][s1 > s2] += -1 # Check if this query peak is already matched to a product ion in the same library spectrum duplicate_idx_in_nl = self._remove_duplicate_with_cpu(modified_idx_product, modified_idx_nl, self.total_spectra_num) modified_value_nl[duplicate_idx_in_nl] = 0 - + if output_matched_peak_number: + matched_peak_number[duplicate_idx_in_nl] += -1 + # convert negative number to 0 + matched_peak_number[matched_peak_number < 0] = 0 entropy_similarity[modified_idx_nl] += modified_value_nl - return entropy_similarity + if output_matched_peak_number: + return entropy_similarity, matched_peak_number + else: + return entropy_similarity elif target == "gpu": import cupy as cp diff --git a/ms_entropy/version.py b/ms_entropy/version.py index 7b344ec..7bb021e 100644 --- a/ms_entropy/version.py +++ b/ms_entropy/version.py @@ -1 +1 @@ -__version__ = '1.1.2' +__version__ = '1.1.3' diff --git a/tests/test_entropy_search.py b/tests/test_entropy_search.py index 07868fe..d239220 100644 --- a/tests/test_entropy_search.py +++ b/tests/test_entropy_search.py @@ -33,10 +33,33 @@ def test_read_and_write(self): self.flash_entropy.read(path_test) def test_hybrid_search(self): - similarity = self.flash_entropy.hybrid_search( + similarity, matched_peaks = self.flash_entropy.hybrid_search( precursor_mz=self.query_spectrum["precursor_mz"], peaks=self.query_spectrum["peaks"], ms2_tolerance_in_da=0.02 ) np.testing.assert_almost_equal(similarity, [1.0, 0.22299, 0.66897, 0.66897], decimal=5) + similarity, matched_peaks = self.flash_entropy.hybrid_search( + precursor_mz=self.query_spectrum["precursor_mz"], + peaks=self.query_spectrum["peaks"], + ms2_tolerance_in_da=0.02, + output_matched_peak_number=True, + ) + np.testing.assert_almost_equal(similarity, [1.0, 0.22299, 0.66897, 0.66897], decimal=5) + np.testing.assert_almost_equal(matched_peaks, [4, 1, 3, 3], decimal=5) + + + def test_hybrid_search_MP(self): + similarity = self.flash_entropy.neutral_loss_search( + precursor_mz=self.query_spectrum["precursor_mz"], peaks=self.query_spectrum["peaks"], ms2_tolerance_in_da=0.02 + ) + np.testing.assert_almost_equal(similarity, [1.0, 0.0, 0.44598, 0.22299], decimal=5) + similarity, matched_peaks = self.flash_entropy.hybrid_search( + precursor_mz=self.query_spectrum["precursor_mz"], + peaks=self.query_spectrum["peaks"], + ms2_tolerance_in_da=0.02, + output_matched_peak_number=True, + ) + np.testing.assert_almost_equal(similarity, [1.0, 0.0, 0.44598, 0.22299], decimal=5) + np.testing.assert_almost_equal(matched_peaks, [4, 0, 2, 1], decimal=5) def test_neutral_loss_search(self): similarity = self.flash_entropy.neutral_loss_search(