Skip to content

Commit

Permalink
Merge pull request #4 from Shunyang2018/MSEntropy_SW
Browse files Browse the repository at this point in the history
add matched peaks to hybrid search
  • Loading branch information
YuanyueLi committed Apr 11, 2024
2 parents 578a176 + 0f9804f commit 902a142
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 14 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,6 @@ r_package/msentropy/src/RcppExports.cpp
ms_entropy/spectra/entropy_cython.c
local/
ms_entropy/entropy_search/fast_flash_entropy_search_cpython.c
ms_entropy/entropy_search/entropy_search.csv
ms_entropy/entropy_search/entropy_search.xlsx
ms_entropy/entropy_search/Untitled-1.ipynb
3 changes: 2 additions & 1 deletion ms_entropy/entropy_search/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .flash_entropy_search import FlashEntropySearch
from .flash_entropy_search_core import FlashEntropySearchCore
from .flash_entropy_search_core_low_memory import FlashEntropySearchCoreLowMemory
from .flash_entropy_search_core_low_memory import FlashEntropySearchCoreLowMemory
from ..version import __version__
37 changes: 30 additions & 7 deletions ms_entropy/entropy_search/flash_entropy_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def hybrid_search(self, precursor_mz, peaks, ms2_tolerance_in_da, target="cpu",
:return: The entropy similarity score for each spectrum in the library, a numpy array with shape (N,), N is the number of spectra in the library.
"""
return self.entropy_search.search_hybrid(target=target, precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da)
return self.entropy_search.search_hybrid(target=target, precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da,**kwargs)

def clean_spectrum_for_search(
self, precursor_mz, peaks, precursor_ions_removal_da: float = 1.6, noise_threshold=0.01, min_ms2_difference_in_da: float = 0.05, max_peak_num: int = 0
Expand Down Expand Up @@ -157,6 +157,7 @@ def search(
noise_threshold=0.01,
min_ms2_difference_in_da: float = 0.05,
max_peak_num: int = None,
**kwargs,
):
"""
Run the Flash entropy search for the query spectrum.
Expand Down Expand Up @@ -195,18 +196,40 @@ def search(
method = {method}

result = {}

if "identity" in method:
result["identity_search"] = self.identity_search(
precursor_mz=precursor_mz, peaks=peaks, ms1_tolerance_in_da=ms1_tolerance_in_da, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target
tmp = self.identity_search(
precursor_mz=precursor_mz, peaks=peaks, ms1_tolerance_in_da=ms1_tolerance_in_da, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target,**kwargs
)
if len(tmp) == 1:
result["identity_search"] = tmp
else:
result["identity_search"] = tmp[0]
result["identity_MP"] = tmp[1]
if "open" in method:
result["open_search"] = self.open_search(peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target)
tmp = self.open_search(peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target,**kwargs)
if len(tmp) == 1:
result["open_search"] = tmp
else:
result["open_search"] = tmp[0]
result["open_MP"] = tmp[1]

if "neutral_loss" in method:
result["neutral_loss_search"] = self.neutral_loss_search(
precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target
tmp = self.neutral_loss_search(
precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target,**kwargs
)
if len(tmp) == 1:
result["neutral_loss_search"] = tmp
else:
result["neutral_loss_search"] = tmp[0]
result["neutral_loss_MP"] = tmp[1]
if "hybrid" in method:
result["hybrid_search"] = self.hybrid_search(precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target)
tmp = self.hybrid_search(precursor_mz=precursor_mz, peaks=peaks, ms2_tolerance_in_da=ms2_tolerance_in_da, target=target,**kwargs)
if len(tmp) == 1:
result["hybrid_search"] = tmp
else:
result["hybrid_search"] = tmp[0]
result["hybrid_MP"] = tmp[1]
return result

def build_index(
Expand Down
24 changes: 20 additions & 4 deletions ms_entropy/entropy_search/flash_entropy_search_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def search(
entropy_similarity[search_spectra_idx_max:] = 0
return entropy_similarity

def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_tolerance_in_da=0.02):
def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_tolerance_in_da=0.02, output_matched_peak_number=False):
"""
Perform the hybrid search for the MS/MS spectra.
Expand Down Expand Up @@ -241,6 +241,8 @@ def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_toleran

if target == "cpu":
entropy_similarity = np.zeros(self.total_spectra_num, dtype=np.float32)
if output_matched_peak_number:
matched_peak_number = np.zeros(self.total_spectra_num, dtype=np.int32)
# Go through all the peaks in the spectrum and calculate the entropy similarity
for peak_idx, (mz, intensity) in enumerate(peaks):
###############################################################
Expand All @@ -253,7 +255,8 @@ def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_toleran
modified_value_product = self._score_peaks_with_cpu(intensity, all_ions_intensity[product_mz_idx_min:product_mz_idx_max])

entropy_similarity[modified_idx_product] += modified_value_product

if output_matched_peak_number:
matched_peak_number[modified_idx_product] += 1
###############################################################
# Match the neutral loss ions
mz_nl = precursor_mz - mz
Expand All @@ -268,19 +271,32 @@ def search_hybrid(self, target="cpu", precursor_mz=None, peaks=None, ms2_toleran
# Calculate the entropy similarity for this matched peak
modified_idx_nl = all_nl_spec_idx[neutral_loss_mz_idx_min:neutral_loss_mz_idx_max]
modified_value_nl = self._score_peaks_with_cpu(intensity, all_nl_intensity[neutral_loss_mz_idx_min:neutral_loss_mz_idx_max])
if output_matched_peak_number:
matched_peak_number[modified_idx_nl] += 1

# Check if the neutral loss ion is already matched to other query peak as a product ion
nl_matched_product_ion_idx = all_ions_idx_for_nl[neutral_loss_mz_idx_min:neutral_loss_mz_idx_max]
s1 = np.searchsorted(product_peak_match_idx_min, nl_matched_product_ion_idx, side="right")
s2 = np.searchsorted(product_peak_match_idx_max - 1, nl_matched_product_ion_idx, side="left")

modified_value_nl[s1 > s2] = 0

if output_matched_peak_number:

matched_peak_number[modified_idx_nl][s1 > s2] += -1

# Check if this query peak is already matched to a product ion in the same library spectrum
duplicate_idx_in_nl = self._remove_duplicate_with_cpu(modified_idx_product, modified_idx_nl, self.total_spectra_num)
modified_value_nl[duplicate_idx_in_nl] = 0

if output_matched_peak_number:
matched_peak_number[duplicate_idx_in_nl] += -1
# convert negative number to 0
matched_peak_number[matched_peak_number < 0] = 0
entropy_similarity[modified_idx_nl] += modified_value_nl
return entropy_similarity
if output_matched_peak_number:
return entropy_similarity, matched_peak_number
else:
return entropy_similarity

elif target == "gpu":
import cupy as cp
Expand Down
2 changes: 1 addition & 1 deletion ms_entropy/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.1.2'
__version__ = '1.1.3'
25 changes: 24 additions & 1 deletion tests/test_entropy_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,33 @@ def test_read_and_write(self):
self.flash_entropy.read(path_test)

def test_hybrid_search(self):
similarity = self.flash_entropy.hybrid_search(
similarity, matched_peaks = self.flash_entropy.hybrid_search(
precursor_mz=self.query_spectrum["precursor_mz"], peaks=self.query_spectrum["peaks"], ms2_tolerance_in_da=0.02
)
np.testing.assert_almost_equal(similarity, [1.0, 0.22299, 0.66897, 0.66897], decimal=5)
similarity, matched_peaks = self.flash_entropy.hybrid_search(
precursor_mz=self.query_spectrum["precursor_mz"],
peaks=self.query_spectrum["peaks"],
ms2_tolerance_in_da=0.02,
output_matched_peak_number=True,
)
np.testing.assert_almost_equal(similarity, [1.0, 0.22299, 0.66897, 0.66897], decimal=5)
np.testing.assert_almost_equal(matched_peaks, [4, 1, 3, 3], decimal=5)


def test_hybrid_search_MP(self):
similarity = self.flash_entropy.neutral_loss_search(
precursor_mz=self.query_spectrum["precursor_mz"], peaks=self.query_spectrum["peaks"], ms2_tolerance_in_da=0.02
)
np.testing.assert_almost_equal(similarity, [1.0, 0.0, 0.44598, 0.22299], decimal=5)
similarity, matched_peaks = self.flash_entropy.hybrid_search(
precursor_mz=self.query_spectrum["precursor_mz"],
peaks=self.query_spectrum["peaks"],
ms2_tolerance_in_da=0.02,
output_matched_peak_number=True,
)
np.testing.assert_almost_equal(similarity, [1.0, 0.0, 0.44598, 0.22299], decimal=5)
np.testing.assert_almost_equal(matched_peaks, [4, 0, 2, 1], decimal=5)

def test_neutral_loss_search(self):
similarity = self.flash_entropy.neutral_loss_search(
Expand Down

0 comments on commit 902a142

Please sign in to comment.