import math import operator import warnings from typing import Any, Dict, Iterable, List, Optional, Tuple, Union import numba as nb import numpy as np from rdkit import Chem try: from pyteomics import cmass as mass except ImportError: from pyteomics import mass from spectrum_utils import utils _aa_mass = mass.std_aa_mass.copy() def static_modification(amino_acid: str, mass_diff: float) -> None: """ Globally modify the monoisotopic mass of an amino acid to set a static modification. Parameters ---------- amino_acid : str The amino acid whose monoisotopic mass is modified. mass_diff : float The *mass difference* to be added to the amino acid's original monoisotopic mass. """ global _aa_mass _aa_mass[amino_acid] += mass_diff def reset_modifications() -> None: """ Undo all static modifications and reset to the standard amino acid monoisotopic masses. """ global _aa_mass _aa_mass = mass.std_aa_mass.copy() class FragmentAnnotation: """ Class representing a general fragment ion annotation. """ def __init__(self, charge: int, calc_mz: float, annotation: str = None)\ -> None: """ Instantiate a new `FragmentAnnotation`. Parameters ---------- charge : int The fragment ion charge if known, None otherwise. calc_mz : float The theoretical m/z value of the fragment. annotation : str The fragment's annotation string. """ self.ion_type = 'unknown' self.charge = charge self.calc_mz = calc_mz self.annotation = annotation def _charge_to_str(self): """ Convert a numeric charge to a string representation. """ if self.charge is None: return 'unknown' elif self.charge > 0: return '+' * self.charge elif self.charge < 0: return '-' * -self.charge else: return 'undefined' def __repr__(self) -> str: return f"FragmentAnnotation(annotation='{self.annotation}', " \ f"charge={self._charge_to_str()}, mz={self.calc_mz})" def __str__(self) -> str: return self.annotation def __eq__(self, other: Any) -> bool: if not isinstance(other, FragmentAnnotation): return False else: return self.annotation == other.annotation class PeptideFragmentAnnotation(FragmentAnnotation): """ Class representing a peptide fragment ion annotation. """ def __init__(self, charge: int, calc_mz: float, ion_type: str, ion_index: int) -> None: """ Instantiate a new `PeptideFragmentAnnotation`. Parameters ---------- charge : int The peptide fragment ion charge. calc_mz : float The theoretical m/z value of the peptide fragment. ion_type : {'a', 'b', 'c', 'x', 'y', 'z'} The peptide fragment ion type. ion_index : int The peptide fragment ion index. """ super().__init__(charge, calc_mz) if ion_type not in 'abcxyz': raise ValueError(f'Unknown ion type: {ion_type}') self.ion_type = ion_type self.ion_index = ion_index self.annotation = f'{self.ion_type}{self.ion_index}' \ f'{self._charge_to_str()}' def __repr__(self) -> str: return f"PeptideFragmentAnnotation(ion_type='{self.ion_type}', " \ f"ion_index={self.ion_index}, charge={self.charge}, " \ f"mz={self.calc_mz})" def __eq__(self, other: Any) -> bool: if not isinstance(other, PeptideFragmentAnnotation): return False else: return (self.ion_type == other.ion_type and self.ion_index == other.ion_index and self.charge == other.charge and math.isclose(self.calc_mz, other.calc_mz)) class MoleculeFragmentAnnotation(FragmentAnnotation): """ Class representing a molecule fragment ion annotation. """ def __init__(self, charge: int, calc_mz: float, smiles: str) -> None: """ Instantiate a new `MoleculeFragmentAnnotation`. Parameters ---------- charge : int The molecule fragment ion charge. calc_mz : float The theoretical m/z value of the molecule fragment. smiles : str The SMILES representation of the molecule. """ super().__init__(charge, calc_mz) self.ion_type = 'molecule' self.smiles = smiles self.annotation = self.smiles def __repr__(self) -> str: return f"MoleculeFragmentAnnotation(smiles='{self.smiles}', " \ f"charge={self.charge}, mz={self.calc_mz})" def __eq__(self, other: Any) -> bool: if not isinstance(other, MoleculeFragmentAnnotation): return False else: return (self.smiles == other.smiles and self.charge == other.charge and math.isclose(self.calc_mz, other.calc_mz)) def _get_theoretical_peptide_fragments( peptide: str, modifications: Optional[Dict[Union[float, str], float]] = None, types: str = 'by', max_charge: int = 1)\ -> List[PeptideFragmentAnnotation]: """ Get theoretical peptide fragments for the given peptide. Parameters ---------- peptide : str The peptide sequence for which the fragments will be generated. The peptide sequence should only exist of the 20 standard amino acids. modifications : Optional[Dict[Union[float, str], float]], optional Mapping of modification positions and mass differences. Valid positions are any amino acid index in the peptide (0-based), 'N-term', and 'C-term'. types : str, optional The fragment type. Can be any combination of 'a', 'b', 'c', 'x', 'y', and 'z' (the default is 'by', which means that b-ions and y-ions will be generated). max_charge : int, optional All fragments up to and including the given charge will be generated (the default is 1 to only generate singly-charged fragments). Returns ------- List[Tuple[PeptideFragmentAnnotation, float]] A list of all fragments as (`PeptideFragmentAnnotation`, m/z) tuples sorted in ascending m/z order. """ if modifications is not None: mods = modifications.copy() if 'N-term' in modifications: mods[-1] = mods['N-term'] del mods['N-term'] if 'C-term' in modifications: mods[len(peptide) + 1] = mods['C-term'] del mods['C-term'] else: mods = {} ions = [] for i in range(1, len(peptide)): for ion_type in types: if ion_type in 'abc': # N-terminal fragment. ion_index = i sequence = peptide[:i] mod_mass = sum([md for pos, md in mods.items() if pos < i]) else: # C-terminal fragment. ion_index = len(peptide) - i sequence = peptide[i:] mod_mass = sum([md for pos, md in mods.items() if pos >= i]) for charge in range(1, max_charge + 1): ions.append(PeptideFragmentAnnotation( charge, mass.fast_mass( sequence=sequence, ion_type=ion_type, charge=charge, aa_mass=_aa_mass) + mod_mass / charge, ion_type, ion_index)) return sorted(ions, key=operator.attrgetter('calc_mz')) @nb.njit(nb.types.Tuple((nb.float32[:], nb.float32[:], nb.int64[:])) (nb.float32[::1], nb.float32[::1])) def _init_spectrum(mz: np.ndarray, intensity: np.ndarray)\ -> Tuple[np.ndarray, np.ndarray, np.ndarray]: mz, intensity = mz.reshape(-1), intensity.reshape(-1) order = np.argsort(mz) return mz[order], intensity[order], order @nb.njit def _round(mz: np.ndarray, intensity: np.ndarray, decimals: int, combine: str)\ -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ JIT helper function for `MsmsSpectrum.round`. Parameters ---------- mz : np.ndarray The mass-to-charge ratios of the spectrum fragment peaks. intensity : np.ndarray The intensities of the corresponding spectrum fragment peaks. decimals : int Number of decimal places to round the mass-to-charge ratios. combine : {'sum', 'max'} Method used to combine intensities from merged fragment peaks. Returns ------- Tuple[np.ndarray, np.ndarray, np.ndarray] A tuple consisting of the rounded mass-to-charge ratios and the corresponding intensities and peak annotation indexes. """ mz_round = np.round_(mz, decimals, np.empty_like(mz, np.float32)) mz_unique = np.unique(mz_round) # If peaks got merged by rounding the mass-to-charge ratios we need to # combine their intensities and annotations as well. if len(mz_unique) < len(mz_round): intensity_unique = np.zeros_like(mz_unique, np.float32) annotations_unique_idx = np.zeros_like(mz_unique, np.int64) combine_is_sum = combine == 'sum' i_orig = 0 offset = 0 for i_unique in range(len(mz_unique)): # Check whether subsequent mz values got merged. while (abs(mz_unique[i_unique] - mz_round[i_orig + offset]) <= 1e-06): offset += 1 # Select the annotation of the most intense peak. annotations_unique_idx[i_unique] = i_orig + np.argmax( intensity[i_orig: i_orig + offset]) # Combine the corresponding intensities. intensity_unique[i_unique] = ( intensity[i_orig: i_orig + offset].sum() if combine_is_sum else intensity[annotations_unique_idx[i_unique]]) i_orig += offset offset = 0 return mz_unique, intensity_unique, annotations_unique_idx else: return mz_unique, intensity, np.arange(len(mz)) @nb.njit def _get_mz_range_mask(mz: np.ndarray, min_mz: float, max_mz: float)\ -> np.ndarray: """ JIT helper function for `MsmsSpectrum.set_mz_range`. Parameters ---------- mz : np.ndarray The mass-to-charge ratios of the spectrum fragment peaks. min_mz : float Minimum m/z (inclusive). max_mz : float Maximum m/z (inclusive). Returns ------- np.ndarray Index mask specifying which peaks are inside of the given m/z range. """ mask = np.full_like(mz, True, np.bool_) mz_i = 0 while mz_i < len(mz) and mz[mz_i] < min_mz: mask[mz_i] = False mz_i += 1 mz_i = len(mz) - 1 while mz_i > 0 and mz[mz_i] > max_mz: mask[mz_i] = False mz_i -= 1 return mask @nb.njit def _get_non_precursor_peak_mask(mz: np.ndarray, pep_mass: float, max_charge: int, isotope: int, fragment_tol_mass: float, fragment_tol_mode: str)\ -> np.ndarray: """ JIT helper function for `MsmsSpectrum.remove_precursor_peak`. Parameters ---------- mz : np.ndarray The mass-to-charge ratios of the spectrum fragment peaks. pep_mass : float The mono-isotopic mass of the uncharged peptide. max_charge : int The maximum precursor loss charge. isotope : int The number of isotopic peaks to be checked. fragment_tol_mass : float Fragment mass tolerance around the precursor mass to remove the precursor peak. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. Returns ------- np.ndarray Index mask specifying which peaks are retained after precursor peak filtering. """ remove_mz = [] for charge in range(max_charge, 0, -1): for iso in range(isotope + 1): remove_mz.append((pep_mass + iso) / charge + 1.0072766) fragment_tol_mode_is_da = fragment_tol_mode == 'Da' mask = np.full_like(mz, True, np.bool_) mz_i = remove_i = 0 while mz_i < len(mz) and remove_i < len(remove_mz): md = utils.mass_diff(mz[mz_i], remove_mz[remove_i], fragment_tol_mode_is_da) if md < -fragment_tol_mass: mz_i += 1 elif md > fragment_tol_mass: remove_i += 1 else: mask[mz_i] = False mz_i += 1 return mask @nb.njit def _get_filter_intensity_mask(intensity: np.ndarray, min_intensity: float, max_num_peaks: int) -> np.ndarray: """ JIT helper function for `MsmsSpectrum.filter_intensity`. Parameters ---------- intensity : np.ndarray The intensities of the spectrum fragment peaks. min_intensity : float Remove peaks whose intensity is below `min_intensity` percentage of the intensity of the most intense peak. max_num_peaks : int Only retain the `max_num_peaks` most intense peaks. Returns ------- np.ndarray Index mask specifying which peaks are retained after filtering the at most `max_num_peaks` most intense intensities above the minimum intensity threshold. """ intensity_idx = np.argsort(intensity) min_intensity *= intensity[intensity_idx[-1]] # Discard low-intensity noise peaks. start_i = 0 for start_i, intens in enumerate(intensity[intensity_idx]): if intens > min_intensity: break # Only retain at most the `max_num_peaks` most intense peaks. mask = np.full_like(intensity, False, np.bool_) mask[intensity_idx[max(start_i, len(intensity_idx) - max_num_peaks):]] =\ True return mask @nb.njit(nb.float32[:](nb.float32[:], nb.int64)) def _get_scaled_intensity_root(intensity: np.ndarray, degree: int)\ -> np.ndarray: """ JIT helper function for `MsmsSpectrum.scale_intensity`. Parameters ---------- intensity : np.ndarray The intensities of the spectrum fragment peaks. degree : int The degree of the root scaling. Returns ------- np.ndarray The root-scaled intensities. """ return np.power(intensity, 1 / degree).astype(np.float32) @nb.njit(nb.float32[:](nb.float32[:], nb.float64)) def _get_scaled_intensity_log(intensity: np.ndarray, base: int) -> np.ndarray: """ JIT helper function for `MsmsSpectrum.scale_intensity`. Parameters ---------- intensity : np.ndarray The intensities of the spectrum fragment peaks. base : int The base of the log scaling. Returns ------- np.ndarray The log-scaled intensities. """ return (np.log1p(intensity) / np.log(base)).astype(np.float32) @nb.njit(nb.float32[:](nb.float32[:], nb.int64)) def _get_scaled_intensity_rank(intensity: np.ndarray, max_rank: int)\ -> np.ndarray: """ JIT helper function for `MsmsSpectrum.scale_intensity`. Parameters ---------- intensity : np.ndarray The intensities of the spectrum fragment peaks. max_rank : int The maximum rank of the rank scaling. Returns ------- np.ndarray The rank-scaled intensities. """ return ((max_rank - np.argsort(np.argsort(intensity)[::-1])) .astype(np.float32)) @nb.njit(nb.float32[:](nb.float32[:], nb.float32), fastmath=True) def _scale_intensity_max(intensity: np.ndarray, max_intensity: float)\ -> np.ndarray: """ JIT helper function for `MsmsSpectrum.scale_intensity`. Parameters ---------- intensity : np.ndarray The intensities of the spectrum fragment peaks. max_intensity : float Intensity of the most intense peak relative to which the peaks will be scaled. Returns ------- np.ndarray The maximum-scaled intensities. """ return intensity * max_intensity / intensity.max() @nb.njit def _get_peptide_fragment_annotation_map( spectrum_mz: np.ndarray, spectrum_intensity: np.ndarray, annotation_mz: List[float], fragment_tol_mass: float, fragment_tol_mode: str, peak_assignment: str = 'most_intense')\ -> List[Tuple[int, int]]: """ JIT helper function for `MsmsSpectrum.annotate_peaks`. Parameters ---------- spectrum_mz : np.ndarray The mass-to-charge varlues of the spectrum fragment peaks. spectrum_intensity : np.ndarray The intensities of the spectrum fragment peaks. annotation_mz : List[float] A list of mass-to-charge values of the peptide fragment annotations. fragment_tol_mass : float Fragment mass tolerance to match spectrum peaks against theoretical peaks. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. peak_assignment : {'most_intense', 'nearest_mz'}, optional In case multiple peaks occur within the given mass window around a theoretical peak, only a single peak will be annotated with the fragment type: - 'most_intense': The most intense peak will be annotated (default). - 'nearest_mz': The peak whose m/z is closest to the theoretical m/z will be annotated. Returns ------- A list of (peak index, annotation index) tuples. """ annotation_i_map = [] peak_i_start = 0 for fragment_i, fragment_mz in enumerate(annotation_mz): while (peak_i_start < len(spectrum_mz) and utils.mass_diff(spectrum_mz[peak_i_start], fragment_mz, fragment_tol_mode == 'Da') < -fragment_tol_mass): peak_i_start += 1 peak_i_stop = peak_i_start while (peak_i_stop < len(spectrum_mz) and utils.mass_diff(spectrum_mz[peak_i_stop], fragment_mz, fragment_tol_mode == 'Da') <= fragment_tol_mass): peak_i_stop += 1 if peak_i_start != peak_i_stop: peak_annotation_i = 0 if peak_assignment == 'nearest_mz': peak_annotation_i = np.argmin(np.abs( spectrum_mz[peak_i_start: peak_i_stop] - fragment_mz)) elif peak_assignment == 'most_intense': peak_annotation_i = np.argmax( spectrum_intensity[peak_i_start: peak_i_stop]) annotation_i_map.append((peak_i_start + peak_annotation_i, fragment_i)) return annotation_i_map @nb.njit def _get_mz_peak_index( spectrum_mz: np.ndarray, spectrum_intensity: np.ndarray, fragment_mz: float, fragment_tol_mass: float, fragment_tol_mode: str, peak_assignment: str = 'most_intense')\ -> Optional[int]: """ Find the best m/z peak in a spectrum relative to the given m/z value. Parameters ---------- spectrum_mz : np.ndarray The mass-to-charge varlues of the spectrum fragment peaks. spectrum_intensity : np.ndarray The intensities of the spectrum fragment peaks. fragment_mz : float The m/z of the requested peak. fragment_tol_mass : float Fragment mass tolerance to match spectrum peaks against theoretical peaks. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. peak_assignment : {'most_intense', 'nearest_mz'}, optional In case multiple peaks occur within the given mass window around a theoretical peak, only a single peak will be annotated with the fragment type: - 'most_intense': The most intense peak will be annotated (default). - 'nearest_mz': The peak whose m/z is closest to the theoretical m/z will be annotated. Returns ------- int None if no peak was found within the given m/z window, else the index of the best m/z peak relative to the given m/z value. """ # Binary search to find the best matching peak. md = (fragment_tol_mass if fragment_tol_mode == 'Da' else fragment_tol_mass / 10**6 * fragment_mz) peak_i_start, peak_i_stop = np.searchsorted( spectrum_mz, [fragment_mz - md, fragment_mz + md]) if peak_i_start == peak_i_stop: return None else: peak_annotation_i = 0 if peak_assignment == 'nearest_mz': peak_annotation_i = np.argmin(np.abs( spectrum_mz[peak_i_start:peak_i_stop] - fragment_mz)) elif peak_assignment == 'most_intense': peak_annotation_i = np.argmax( spectrum_intensity[peak_i_start:peak_i_stop]) return peak_i_start + peak_annotation_i class MsmsSpectrum: """ Class representing a tandem mass spectrum. """ def __init__(self, identifier: str, precursor_mz: float, precursor_charge: int, mz: Union[np.ndarray, Iterable], intensity: Union[np.ndarray, Iterable], annotation: Optional[Union[np.ndarray, Iterable]] = None, retention_time: Optional[float] = None, peptide: Optional[str] = None, modifications: Optional[Dict[Union[int, str], float]] = None, is_decoy: bool = False) -> None: """ Instantiate a new `MsmsSpectrum` consisting of fragment peaks. Parameters ---------- identifier : str (Unique) spectrum identifier. See for example the universal spectrum identifier specification by the Proteomics Standards Initiative. precursor_mz : float Precursor ion mass-to-charge ratio. precursor_charge : int Precursor ion charge. mz : array_like Mass-to-charge ratios of the fragment peaks. intensity : array_like Intensities of the corresponding fragment peaks in `mz`. annotation : Optional[array_like], optional Annotations of the corresponding fragment peaks in `mz` (the default is None, which indicates that the fragment peaks are not annotated). retention_time : Optional[float], optional Retention time at which the spectrum was acquired (the default is None, which indicates that retention time is unspecified/unknown). peptide : Optional[str], optional The peptide sequence corresponding to the spectrum (the default is None, which means that no peptide-spectrum match is specified). The peptide sequence should only exist of the 20 standard amino acids. modifications : Optional[Dict[Union[int, str], float]], optional Mapping of modification positions and mass differences. Valid positions are any amino acid index in the peptide (0-based), 'N-term', and 'C-term'. is_decoy : bool, optional Flag indicating whether the `peptide` is a target or decoy peptide (the default is False, which implies a target peptide). """ self.identifier = identifier self.precursor_mz = precursor_mz self.precursor_charge = precursor_charge if len(mz) != len(intensity): raise ValueError('The mass-to-charge and intensity arrays should ' 'have equal length') self._mz, self._intensity, order = _init_spectrum( np.require(mz, np.float32, 'W'), np.require(intensity, np.float32, 'W')) if annotation is not None: self._annotation = np.asarray(annotation).reshape(-1) if len(self.mz) != len(self.annotation): raise ValueError('The mass-to-charge and annotation arrays ' 'should have equal length') else: self._annotation = self.annotation[order] else: self._annotation = None self.retention_time = retention_time if peptide is not None: self.peptide = peptide.upper() for aa in self.peptide: if aa not in mass.std_aa_mass: raise ValueError(f'Unknown amino acid: {aa}') else: self.peptide = None if peptide is not None and modifications is not None: for mod_pos in modifications.keys(): if mod_pos not in ('N-term', 'C-term'): if not isinstance(mod_pos, int): raise ValueError(f'Unknown modification position: ' f'{mod_pos}') elif mod_pos < 0 or mod_pos > len(peptide): raise ValueError(f'Modification position exceeds ' f'peptide bounds: {mod_pos}') else: modifications = None self.modifications = modifications self.is_decoy = is_decoy @property def mz(self) -> np.ndarray: """ Get or set the mass-to-charge ratios of the fragment peaks. When setting new m/z values it should be possible to convert the given values to a NumPy array and the number of m/z values should be equal to the number of intensity (and annotation) values. Returns ------- np.ndarray The mass-to-charge ratios of the fragment peaks. """ return self._mz @mz.setter def mz(self, mz: Union[np.ndarray, Iterable]) -> None: if np.asarray(mz).ndim > 0: self._mz = np.asarray(mz) else: raise ValueError('Invalid m/z values') @property def intensity(self) -> np.ndarray: """ Get or set the intensities of the fragment peaks. When setting new intensity values it should be possible to convert the given values to a NumPy array and the number of intensity values should be equal to the number of m/z (and annotation) values. Returns ------- np.ndarray The intensity values of the fragment peaks. """ return self._intensity @intensity.setter def intensity(self, intensity: Union[np.ndarray, Iterable]) -> None: if np.asarray(intensity).ndim > 0: self._intensity = np.asarray(intensity) else: raise ValueError('Invalid intensity values') @property def annotation(self) -> Optional[np.ndarray]: """ Get or set the annotations of the fragment peaks. When setting new annotations it should be possible to convert the given values to a NumPy array (or None) and the number of annotations should be equal to the number of m/z and intensity values. Returns ------- Optional[np.ndarray] The annotations of the fragment peaks or None if no annotations have been specified. """ return self._annotation @annotation.setter def annotation( self, annotation: Optional[Union[np.ndarray, Iterable]] = None)\ -> None: if annotation is None: self._annotation = None elif np.asarray(annotation).ndim > 0: self._annotation = np.asarray(annotation) else: raise ValueError('Invalid annotation values') def round(self, decimals: int = 0, combine: str = 'sum') -> 'MsmsSpectrum': """ Round the mass-to-charge ratios of the fragment peaks to the given number of decimals. Peaks that have the same mass-to-charge ratio after rounding will be combined using the specified strategy. If multiple peaks are merged into a single peak it will be annotated with the annotation of the most intense peak. Parameters ---------- decimals : int, optional Number of decimal places to round the `mz` to (default: 0). If decimals is negative, it specifies the number of positions to the left of the decimal point. combine : {'sum', 'max'} Method used to combine intensities from merged fragment peaks. ``sum`` specifies that the intensities of the merged fragment peaks will be summed, ``max`` specifies that the maximum intensity of the fragment peaks that are merged is used (the default is ``sum``). Returns ------- self : `MsmsSpectrum` """ self.mz, self.intensity, annotation_idx = _round( self.mz, self.intensity, decimals, combine) if self.annotation is not None: self.annotation = self.annotation[annotation_idx] return self def set_mz_range(self, min_mz: Optional[float] = None, max_mz: Optional[float] = None) -> 'MsmsSpectrum': """ Restrict the mass-to-charge ratios of the fragment peaks to the given range. Parameters ---------- min_mz : Optional[float], optional Minimum m/z (inclusive). If not set no minimal m/z restriction will occur. max_mz : Optional[float], optional Maximum m/z (inclusive). If not set no maximal m/z restriction will occur. Returns ------- self : `MsmsSpectrum` """ if min_mz is None and max_mz is None: return self else: if min_mz is None: min_mz = self.mz[0] if max_mz is None: max_mz = self.mz[-1] mz_range_mask = _get_mz_range_mask(self.mz, min_mz, max_mz) self.mz = self.mz[mz_range_mask] self.intensity = self.intensity[mz_range_mask] if self.annotation is not None: self.annotation = self.annotation[mz_range_mask] return self def remove_precursor_peak(self, fragment_tol_mass: float, fragment_tol_mode: str, isotope: int = 0)\ -> 'MsmsSpectrum': """ Remove fragment peak(s) close to the precursor mass-to-charge ratio. Parameters ---------- fragment_tol_mass : float Fragment mass tolerance around the precursor mass to remove the precursor peak. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. isotope : int The number of precursor isotopic peaks to be checked (the default is 0 to check only the mono-isotopic peaks). Returns ------- self : `MsmsSpectrum` """ pep_mass = (self.precursor_mz - 1.0072766) * self.precursor_charge peak_mask = _get_non_precursor_peak_mask( self.mz, pep_mass, self.precursor_charge, isotope, fragment_tol_mass, fragment_tol_mode) self.mz = self.mz[peak_mask] self.intensity = self.intensity[peak_mask] if self.annotation is not None: self.annotation = self.annotation[peak_mask] return self def filter_intensity(self, min_intensity: float = 0.0, max_num_peaks: Optional[int] = None)\ -> 'MsmsSpectrum': """ Remove low-intensity fragment peaks. Only the `max_num_peaks` most intense fragment peaks are retained. Additionally, noise peaks whose intensity is below `min_intensity` percentage of the intensity of the most intense peak are removed. Parameters ---------- min_intensity : float, optional Remove peaks whose intensity is below `min_intensity` percentage of the intensity of the most intense peak (the default is 0, which means that no minimum intensity filter will be applied). max_num_peaks : Optional[int], optional Only retain the `max_num_peaks` most intense peaks (the default is None, which retains all peaks). Returns ------- self : `MsmsSpectrum` """ if max_num_peaks is None: max_num_peaks = len(self.intensity) intensity_mask = _get_filter_intensity_mask( self.intensity, min_intensity, max_num_peaks) self.mz = self.mz[intensity_mask] self.intensity = self.intensity[intensity_mask] if self.annotation is not None: self.annotation = self.annotation[intensity_mask] return self def scale_intensity(self, scaling: Optional[str] = None, max_intensity: Optional[float] = None, **kwargs) -> 'MsmsSpectrum': """ Scale the intensity of the fragment peaks. Two types of scaling can be performed: scaling all peaks using a specific transformation and scaling the peaks relative to the most intense peak. Parameters ---------- scaling : {'root', 'log', 'rank'}, optional Method to scale the peak intensities (the default is None, which means that no transformation will be performed). Potential transformation options are: - 'root': Root-transform the peak intensities. The default is a square root transformation (`degree` is 2). The degree of the root can be specified using the `degree` kwarg. - 'log': Log-transform the peak intensities. The default is a log2 transformation (`base` is 2) after summing the intensities with 1 to avoid negative values after the transformation. The base of the logarithm can be specified using the `base` kwarg. - 'rank': Rank-transform the peak intensities. The maximum rank of the most intense peak can be specified using the `max_rank` kwarg, by default the number of peaks in the spectrum is used as the maximum rank. Note that `max_rank` should be greater than or equal to the number of peaks in the spectrum. max_intensity : Optional[float], optional Intensity of the most intense peak relative to which the peaks will be scaled (the default is None, which means that no scaling relative to the most intense peak will be performed). Returns ------- self : `MsmsSpectrum` """ if scaling == 'root': self.intensity = _get_scaled_intensity_root( self.intensity, kwargs.get('degree', 2)) elif scaling == 'log': self.intensity = _get_scaled_intensity_log( self.intensity, kwargs.get('base', 2)) elif scaling == 'rank': max_rank = kwargs.get('max_rank', len(self.intensity)) if max_rank < len(self.intensity): raise ValueError('`max_rank` should be greater than or equal ' 'to the number of peaks in the spectrum. See ' '`filter_intensity` to reduce the number of ' 'peaks in the spectrum.') self.intensity = _get_scaled_intensity_rank( self.intensity, max_rank) if max_intensity is not None: self.intensity = _scale_intensity_max( self.intensity, max_intensity) return self def annotate_peaks(self, *args, **kwargs): raise DeprecationWarning('Renamed to annotate_peptide_fragments') def annotate_peptide_fragments(self, fragment_tol_mass: float, fragment_tol_mode: str, ion_types: str = 'by', max_ion_charge: Optional[int] = None, peak_assignment: str = 'most_intense')\ -> 'MsmsSpectrum': """ Annotate peaks with their corresponding peptide fragment ion annotations. `self.annotation` will be overwritten and include `PeptideFragmentAnnotation` objects for matching peaks. Parameters ---------- fragment_tol_mass : float Fragment mass tolerance to match spectrum peaks against theoretical peaks. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. ion_types : str, optional Fragment type to annotate. Can be any combination of 'a', 'b', 'c', 'x', 'y', and 'z' (the default is 'by', which means that b-ions and y-ions will be annotated). max_ion_charge : Optional[int], optional All fragments up to and including the given charge will be annotated (by default all fragments with a charge up to the precursor minus, or minimum charge one, one will be annotated). peak_assignment : {'most_intense', 'nearest_mz'}, optional In case multiple peaks occur within the given mass window around a theoretical peak, only a single peak will be annotated with the fragment type: - 'most_intense': The most intense peak will be annotated (default). - 'nearest_mz': The peak whose m/z is closest to the theoretical m/z will be annotated. Returns ------- self : `MsmsSpectrum` """ if self.peptide is None: raise ValueError('No peptide sequence available for the spectrum') if max_ion_charge is None: max_ion_charge = max(1, self.precursor_charge - 1) theoretical_fragments = _get_theoretical_peptide_fragments( self.peptide, self.modifications, ion_types, max_ion_charge) self.annotation = np.full_like(self.mz, None, object) with warnings.catch_warnings(): # FIXME: Deprecated reflected list in Numba should be resolved from # version 0.46.0 onwards. # https://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-reflection-for-list-and-set-types warnings.simplefilter('ignore', nb.NumbaPendingDeprecationWarning) for annotation_i, fragment_i in\ _get_peptide_fragment_annotation_map( self.mz, self.intensity, [fragment.calc_mz for fragment in theoretical_fragments], fragment_tol_mass, fragment_tol_mode, peak_assignment): self.annotation[annotation_i] =\ theoretical_fragments[fragment_i] return self def annotate_molecule_fragment(self, smiles: str, fragment_mz: float, fragment_charge: int, fragment_tol_mass: float, fragment_tol_mode: str, peak_assignment: str = 'most_intense')\ -> 'MsmsSpectrum': """ Annotate a peak (if present) with its corresponding molecule. The matching position in `self.annotation` will be overwritten by the provided molecule. Parameters ---------- smiles : str The fragment molecule that will be annotated in SMILES format. fragment_mz : float The expected m/z of the molecule. fragment_charge : int The charge of the molecule. fragment_tol_mass : float Fragment mass tolerance to match spectrum peaks against the theoretical molecule mass. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. peak_assignment : {'most_intense', 'nearest_mz'}, optional In case multiple peaks occur within the given mass window around the molecule's given mass, only a single peak will be annotated: - 'most_intense': The most intense peak will be annotated (default). - 'nearest_mz': The peak whose m/z is closest to the theoretical m/z will be annotated. Returns ------- self : `MsmsSpectrum` """ # Make sure the SMILES annotation is valid and sanitized. mol = Chem.MolFromSmiles(smiles, sanitize=False) if mol is None or Chem.SanitizeMol(mol, catchErrors=True) != 0: raise ValueError('Invalid SMILES molecule') # Find the matching m/z value. peak_index = _get_mz_peak_index(self.mz, self.intensity, fragment_mz, fragment_tol_mass, fragment_tol_mode, peak_assignment) if peak_index is None: raise ValueError(f'No matching peak found for molecule "{smiles}"') else: # Initialize the annotations if they don't exist yet. if self.annotation is None: self.annotation = np.full_like(self.mz, None, object) # Set the molecule's annotation. self.annotation[peak_index] =\ MoleculeFragmentAnnotation(fragment_charge, fragment_mz, Chem.MolToSmiles(mol)) return self def annotate_mz_fragment(self, fragment_mz: float, fragment_charge: int, fragment_tol_mass: float, fragment_tol_mode: str, peak_assignment: str = 'most_intense', text: Optional[str] = None) -> 'MsmsSpectrum': """ Annotate a peak (if present) with its m/z value or a custom provided string. The matching position in `self.annotation` will be overwritten. Parameters ---------- fragment_mz : float The expected m/z to annotate. fragment_charge : int The peak charge. fragment_tol_mass : float Fragment mass tolerance to match spectrum peaks against the given m/z. fragment_tol_mode : {'Da', 'ppm'} Fragment mass tolerance unit. Either 'Da' or 'ppm'. peak_assignment : {'most_intense', 'nearest_mz'}, optional In case multiple peaks occur within the given mass window around the given m/z, only a single peak will be annotated: - 'most_intense': The most intense peak will be annotated (default). - 'nearest_mz': The peak whose m/z is closest to the given m/z will be annotated. text : Optional[str], optional The text to annotate the peak with. If None, its m/z value will be used. Returns ------- self : `MsmsSpectrum` """ # Find the matching m/z value. peak_index = _get_mz_peak_index(self.mz, self.intensity, fragment_mz, fragment_tol_mass, fragment_tol_mode, peak_assignment) if peak_index is None: raise ValueError(f'No matching peak found for {fragment_mz} m/z') else: # Initialize the annotations if they don't exist yet. if self.annotation is None: self.annotation = np.full_like(self.mz, None, object) # Set the peak annotation. self.annotation[peak_index] =\ FragmentAnnotation( fragment_charge, fragment_mz, text if text is not None else str(fragment_mz)) return self