Skip to content

Commit 1e3d86c

Browse files
zhuoxinshitrishortsnbollisRayMSMSAlexander-Sol
authored
MassIndexingEngine (smith-chem-wisc#886)
* Created IndexedMass, MassIndexingEngine, and mass indexing method in the engine * Added GetXIC method in MassIndexingEngine and tests * added some tests * removed unnecessary override methods; Added optional charge in generic peak finding method so it can work with all kinds of indexed peaks * ppmtol * bug * error * converted all doubles into float in IndexedMass * added comments and exception check * . * . * comments * moved exception handling to GetPeakFromBin * . * merge * added override GetAllXics in massIndexingEngine * added more tests * . * moved exception check * changed RT to zerobasedScanIndex in getallXIcs * check if peak is indexedmass in GetAllXics --------- Co-authored-by: trishorts <[email protected]> Co-authored-by: Nic Bollis <[email protected]> Co-authored-by: Sugar Ray <[email protected]> Co-authored-by: Alexander-Sol <[email protected]>
1 parent 89bb5c5 commit 1e3d86c

File tree

8 files changed

+226
-48
lines changed

8 files changed

+226
-48
lines changed

mzLib/FlashLFQ/PeakIndexingEngine/IFlashLfqIndexingEngine.cs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ public interface IFlashLfqIndexingEngine
2121
/// </summary>
2222
/// <param name="mz"> the m/z of the peak to be searched for </param>
2323
/// <param name="zeroBasedScanIndex"> the zero based index of the scan where the peak is to be found </param>
24-
public IIndexedPeak GetIndexedPeak(double mz, int zeroBasedScanIndex, Tolerance tolerance);
24+
/// <param name="charge"> an optional parameter used only for IIndexedMass and massIndexingEngine; must be null for mz peak indexing </param>
25+
public IIndexedPeak GetIndexedPeak(double mz, int zeroBasedScanIndex, Tolerance tolerance, int? charge = null);
2526
/// <summary>
2627
/// A generic method of peak tracing across the retention time. Finds peaks with a given mz that occur on either side of a given
2728
/// retention time. Peak searching iterates backwards through the scans until the peak
@@ -34,8 +35,9 @@ public interface IFlashLfqIndexingEngine
3435
/// <param name="missedScansAllowed"> the number of successive missed scans allowed before the xic is terminated </param>
3536
/// <param name="maxPeakHalfWidth"> the maximum distance from the apex RT of the XIC to both start RT and end RT </param>
3637
/// <param name="matchedPeaks"> the dictionary that stores all the peaks already matched to an xic </param>
38+
/// <param name="charge"> an optional parameter used only for IIndexedMass and massIndexingEngine; must be null for mz peak indexing </param>
3739
/// <returns> A list of IIndexedPeak objects, ordered by retention time </returns>
38-
public List<IIndexedPeak> GetXic(double mz, double retentionTime, Tolerance ppmTolerance, int missedScansAllowed, double maxPeakHalfWidth = int.MaxValue, Dictionary<IIndexedPeak, ExtractedIonChromatogram> matchedPeaks = null);
40+
public List<IIndexedPeak> GetXic(double mz, double retentionTime, Tolerance ppmTolerance, int missedScansAllowed, double maxPeakHalfWidth = int.MaxValue, int? charge = null, Dictionary<IIndexedPeak, ExtractedIonChromatogram> matchedPeaks = null);
3941
/// <summary>
4042
/// Clear the indexed peaks and the jagged array of indexed peaks to free up memory
4143
/// </summary>

mzLib/MassSpectrometry/PeakIndexing/ExtractedIonChromatogram.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace MassSpectrometry
1515
/// </summary>
1616
public class ExtractedIonChromatogram
1717
{
18-
public List<IIndexedPeak> Peaks { get; set; }
18+
public virtual List<IIndexedPeak> Peaks { get; set; }
1919

2020
public double ApexRT;
2121
public int ApexScanIndex;
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Threading.Tasks;
6+
7+
namespace MassSpectrometry
8+
{
9+
public class IndexedMass : IIndexedPeak
10+
{
11+
public float Intensity { get; set; }
12+
public float RetentionTime { get; set; }
13+
public int ZeroBasedScanIndex { get; set; }
14+
public float M { get; set; }
15+
public int Charge { get; set; }
16+
public int MsLevel { get; set; }
17+
public IsotopicEnvelope IsotopicEnvelope { get; set; }
18+
19+
public IndexedMass(IsotopicEnvelope envelope, double retentionTime, int zeroBasedScanIndex, int msLevel)
20+
{
21+
IsotopicEnvelope = envelope;
22+
Intensity = envelope.Peaks.Max(p => (float)p.intensity);
23+
RetentionTime = (float)retentionTime;
24+
ZeroBasedScanIndex = zeroBasedScanIndex;
25+
M = (float)envelope.MonoisotopicMass;
26+
Charge = envelope.Charge;
27+
MsLevel = msLevel;
28+
}
29+
}
30+
}

mzLib/MassSpectrometry/PeakIndexing/IndexingEngine.cs

Lines changed: 38 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
using Chemistry;
2-
using MzLibUtil;
1+
using MzLibUtil;
32
using System;
43
using System.Collections.Generic;
54
using System.Linq;
@@ -19,7 +18,7 @@ public abstract class IndexingEngine<T> where T : IIndexedPeak
1918
/// </summary>
2019
protected List<T>[]? IndexedPeaks;
2120
protected virtual int BinsPerDalton => 100;
22-
public ScanInfo[]? ScanInfoArray { get; private set; }
21+
public ScanInfo[]? ScanInfoArray { get; protected set; }
2322

2423
/// <summary>
2524
/// Read in all spectral peaks from scans, index the peaks and store them in a list ordered by m/z
@@ -56,26 +55,19 @@ public virtual bool IndexPeaks(MsDataScan[] scanArray)
5655
return true;
5756
}
5857

59-
/// <summary>
60-
/// A generic method for finding the closest peak with a specified mass and charge state and in a specified scan. Returns null if no peaks within tolerance are found.
61-
/// </summary>
62-
/// <param name="mz"> the m/z of the peak to be searched for </param>
63-
/// <param name="zeroBasedScanIndex"> the zero based index of the scan where the peak is to be found </param>
64-
public IIndexedPeak? GetIndexedPeak(double theorMass, int zeroBasedScanIndex, Tolerance ppmTolerance, int chargeState) =>
65-
GetIndexedPeak(theorMass.ToMz(chargeState), zeroBasedScanIndex, ppmTolerance);
66-
6758
/// <summary>
6859
/// A generic method for finding the closest peak with a specified m/z and in a specified scan. Returns null if no peaks within tolerance are found.
6960
/// </summary>
70-
/// <param name="mz"> the m/z of the peak to be searched for </param>
61+
/// <param name="m"> the m/z of the peak to be searched for </param>
7162
/// <param name="zeroBasedScanIndex"> the zero based index of the scan where the peak is to be found </param>
72-
public IIndexedPeak? GetIndexedPeak(double mz, int zeroBasedScanIndex, Tolerance ppmTolerance)
63+
/// <param name="charge"> an optional parameter used only for IIndexedMass and massIndexingEngine; must be null for mz peak indexing </param>
64+
public IIndexedPeak? GetIndexedPeak(double m, int zeroBasedScanIndex, Tolerance ppmTolerance, int? charge = null)
7365
{
7466
if (IndexedPeaks == null) throw new MzLibException("Error: Attempt to retrieve indexed peak before peak indexing was performed");
75-
var bins = GetBinsInRange(mz, ppmTolerance);
67+
var bins = GetBinsInRange(m, ppmTolerance);
7668
if (bins.Count == 0) return default(T);
7769
List<int> peakIndicesInBins = bins.Select(b => BinarySearchForIndexedPeak(b, zeroBasedScanIndex)).ToList();
78-
return GetBestPeakFromBins(bins, mz, zeroBasedScanIndex, peakIndicesInBins, ppmTolerance);
70+
return GetBestPeakFromBins(bins, m, zeroBasedScanIndex, peakIndicesInBins, ppmTolerance, charge);
7971
}
8072

8173
/// <summary>
@@ -85,17 +77,20 @@ public virtual bool IndexPeaks(MsDataScan[] scanArray)
8577
/// missedScansAllowed parameter. Missed scans don't have to be sequential. The same procedure
8678
/// is then repeated in the forward direction.
8779
/// </summary>
88-
/// <param name="mz"> the m/z of the peak to be searched for </param>
80+
/// <param name="m"> the m/z of the peak to be searched for </param>
8981
/// <param name="retentionTime"> the retention time where peak searching will begin </param>
9082
/// <param name="missedScansAllowed"> the number of successive missed scans allowed before the xic is terminated </param>
9183
/// <param name="maxPeakHalfWidth"> the maximum distance from the apex RT of the XIC to both start RT and end RT </param>
84+
/// <param name="charge"> an optional parameter used only for IIndexedMass and massIndexingEngine; must be null for mz peak indexing </param>
9285
/// <param name="matchedPeaks"> the dictionary that stores all the peaks already matched to an xic </param>
9386
/// <returns> A list of IIndexedPeak objects, ordered by retention time </returns>
94-
public List<IIndexedPeak> GetXic(double mz, double retentionTime, Tolerance ppmTolerance, int missedScansAllowed, double maxPeakHalfWidth = double.MaxValue, Dictionary<IIndexedPeak, ExtractedIonChromatogram> matchedPeaks = null)
87+
public List<IIndexedPeak> GetXic(double m, double retentionTime, Tolerance ppmTolerance,
88+
int missedScansAllowed, double maxPeakHalfWidth = double.MaxValue, int? charge = null, Dictionary<IIndexedPeak, ExtractedIonChromatogram> matchedPeaks = null)
9589
{
9690
// get precursor scan to start at
9791
int scanIndex = -1;
9892
if (ScanInfoArray == null) throw new MzLibException("Error: Attempt to retrieve XIC before peak indexing was performed");
93+
9994
foreach (ScanInfo scan in ScanInfoArray)
10095
{
10196
if (scan.RetentionTime < retentionTime)
@@ -108,7 +103,7 @@ public List<IIndexedPeak> GetXic(double mz, double retentionTime, Tolerance ppmT
108103
}
109104
}
110105

111-
return GetXic(mz, scanIndex, ppmTolerance, missedScansAllowed, maxPeakHalfWidth, matchedPeaks);
106+
return GetXic(m, scanIndex, ppmTolerance, missedScansAllowed, maxPeakHalfWidth, charge, matchedPeaks);
112107
}
113108

114109
/// <summary>
@@ -118,23 +113,25 @@ public List<IIndexedPeak> GetXic(double mz, double retentionTime, Tolerance ppmT
118113
/// missedScansAllowed parameter. Missed scans don't have to be sequential. The same procedure
119114
/// is then repeated in the forward direction.
120115
/// </summary>
121-
/// <param name="mz"> the m/z of the peak to be searched for </param>
116+
/// <param name="m"> the m/z of the peak to be searched for </param>
122117
/// <param name="zeroBasedStartIndex"> the scan where peak searching begins </param>
123118
/// <param name="missedScansAllowed"> the number of successive missed scans allowed before the xic is terminated </param>
124119
/// <param name="maxPeakHalfWidth"> the maximum distance from the apex RT of the XIC to both start RT and end RT </param>
120+
/// <param name="charge"> an optional parameter used only for IIndexedMass and massIndexingEngine; must be null for mz peak indexing </param>
125121
/// <param name="matchedPeaks"> the dictionary that stores all the peaks already matched to an xic </param>
126122
/// <returns> A list of IIndexedPeak objects, ordered by retention time </returns>
127-
public List<IIndexedPeak> GetXic(double mz, int zeroBasedStartIndex, Tolerance ppmTolerance, int missedScansAllowed, double maxPeakHalfWidth = double.MaxValue, Dictionary<IIndexedPeak, ExtractedIonChromatogram> matchedPeaks = null)
123+
public List<IIndexedPeak> GetXic(double m, int zeroBasedStartIndex, Tolerance ppmTolerance, int missedScansAllowed, double maxPeakHalfWidth = double.MaxValue, int? charge = null, Dictionary<IIndexedPeak, ExtractedIonChromatogram> matchedPeaks = null)
128124
{
129125
if (IndexedPeaks == null || ScanInfoArray == null) throw new MzLibException("Error: Attempt to retrieve XIC before peak indexing was performed");
126+
130127
List<IIndexedPeak> xic = new List<IIndexedPeak>();
131-
var allBins = GetBinsInRange(mz, ppmTolerance);
128+
var allBins = GetBinsInRange(m, ppmTolerance);
132129
if (allBins.Count == 0)
133130
return xic;
134131

135132
// For each bin, find + store a pointer to the current index
136133
int[] peakPointerArray = allBins.Select(b => BinarySearchForIndexedPeak(b, zeroBasedStartIndex)).ToArray();
137-
var initialPeak = GetBestPeakFromBins(allBins, mz, zeroBasedStartIndex, peakPointerArray, ppmTolerance);
134+
var initialPeak = GetBestPeakFromBins(allBins, m, zeroBasedStartIndex, peakPointerArray, ppmTolerance, charge);
138135

139136
if (initialPeak.IsNotDefaultOrNull())
140137
xic.Add(initialPeak);
@@ -175,7 +172,7 @@ public List<IIndexedPeak> GetXic(double mz, int zeroBasedStartIndex, Tolerance p
175172
}
176173

177174
// Search for the next peak
178-
var nextPeak = GetBestPeakFromBins(allBins, mz, currentZeroBasedScanIndex, pointerArrayCopy, ppmTolerance);
175+
var nextPeak = GetBestPeakFromBins(allBins, m, currentZeroBasedScanIndex, pointerArrayCopy, ppmTolerance, charge);
179176

180177
// Add the peak to the XIC or increment the missed peaks
181178
if (nextPeak == null || (matchedPeaks != null && matchedPeaks.ContainsKey(nextPeak)))
@@ -197,7 +194,7 @@ public List<IIndexedPeak> GetXic(double mz, int zeroBasedStartIndex, Tolerance p
197194
/// <summary>
198195
/// A generic method performing peak tracing for all the peaks in an indexingEngine and trying to find all XICs.
199196
/// <returns> A list of ExtractedIonChromatogram objects representing all XICs that can be found in an indexingEngine </returns>
200-
public List<ExtractedIonChromatogram> GetAllXics(PpmTolerance peakFindingTolerance, int maxMissedScanAllowed, double maxRTRange, int numPeakThreshold)
197+
public virtual List<ExtractedIonChromatogram> GetAllXics(Tolerance peakFindingTolerance, int maxMissedScanAllowed, double maxRTRange, int numPeakThreshold)
201198
{
202199
var xics = new List<ExtractedIonChromatogram>();
203200
var matchedPeaks = new Dictionary<IIndexedPeak, ExtractedIonChromatogram>();
@@ -206,7 +203,8 @@ public List<ExtractedIonChromatogram> GetAllXics(PpmTolerance peakFindingToleran
206203
{
207204
if (!matchedPeaks.ContainsKey(peak))
208205
{
209-
var peakList = GetXic(peak.M, peak.RetentionTime, peakFindingTolerance, maxMissedScanAllowed, maxRTRange, matchedPeaks);
206+
int? charge = peak is IndexedMass indexedMass ? indexedMass.Charge : null;
207+
var peakList = GetXic(peak.M, peak.RetentionTime, peakFindingTolerance, maxMissedScanAllowed, maxRTRange, charge, matchedPeaks);
210208
if (peakList.Count >= numPeakThreshold)
211209
{
212210
var newXIC = new ExtractedIonChromatogram(peakList);
@@ -240,12 +238,20 @@ internal List<List<T>> GetBinsInRange(double mz, Tolerance ppmTolerance)
240238
return allBins;
241239
}
242240

243-
internal static T? GetBestPeakFromBins(List<List<T>> allBins, double mz, int zeroBasedScanIndex, IList<int> peakIndicesInBins, Tolerance ppmTolerance)
241+
/// <summary>
242+
/// <param name="charge"> an optional parameter used only for IIndexedMass and massIndexingEngine; must be null for mz peak indexing </param>
243+
/// Returns the peak that is closest to the target mz from all possible bins
244+
/// </summary>
245+
internal static T? GetBestPeakFromBins(List<List<T>> allBins, double mz, int zeroBasedScanIndex, IList<int> peakIndicesInBins, Tolerance ppmTolerance, int? charge = null)
244246
{
245247
T? bestPeak = default(T);
248+
if (charge != null && typeof(T) != typeof(IndexedMass))
249+
{
250+
throw new MzLibException("Error: Attempted to access a peak using a charge parameter, but the peaks do not have charge information available.");
251+
}
246252
for (int i = 0; i < allBins.Count; i++)
247253
{
248-
var tempPeak = GetPeakFromBin(allBins[i], mz, zeroBasedScanIndex, peakIndicesInBins[i], ppmTolerance);
254+
var tempPeak = GetPeakFromBin(allBins[i], mz, zeroBasedScanIndex, peakIndicesInBins[i], ppmTolerance, charge);
249255
if (tempPeak.IsDefaultOrNull()) continue;
250256
// Check if the peak is within the tolerance and if it is closer to the target M than the current peak
251257
if (bestPeak.IsDefaultOrNull() || Math.Abs(tempPeak.M - mz) < Math.Abs(bestPeak.M - mz))
@@ -257,9 +263,10 @@ internal List<List<T>> GetBinsInRange(double mz, Tolerance ppmTolerance)
257263
}
258264

259265
/// <summary>
260-
/// Returns the peak that is closest to the target mz
266+
/// <param name="charge"> an optional parameter used only for IIndexedMass and massIndexingEngine; must be null for mz peak indexing </param>
267+
/// Returns the peak that is closest to the target mz from one bin
261268
/// </summary>
262-
internal static T GetPeakFromBin(List<T> bin, double mz, int zeroBasedScanIndex, int peakIndexInBin, Tolerance ppmTolerance)
269+
internal static T GetPeakFromBin(List<T> bin, double mz, int zeroBasedScanIndex, int peakIndexInBin, Tolerance ppmTolerance, int? charge = null)
263270
{
264271
T? bestPeak = default(T);
265272
if (peakIndexInBin < 0 || peakIndexInBin >= bin.Count) return bestPeak;
@@ -274,7 +281,8 @@ internal static T GetPeakFromBin(List<T> bin, double mz, int zeroBasedScanIndex,
274281

275282
if (ppmTolerance.Within(peak.M, mz)
276283
&& peak.ZeroBasedScanIndex == zeroBasedScanIndex
277-
&& (bestPeak.IsDefaultOrNull() || Math.Abs(peak.M - mz) < Math.Abs(bestPeak.M - mz)))
284+
&& (bestPeak.IsDefaultOrNull() || Math.Abs(peak.M - mz) < Math.Abs(bestPeak.M - mz))
285+
&& (charge == null || (peak is IndexedMass mass && mass.Charge == charge)))
278286
{
279287
bestPeak = peak;
280288
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
using MathNet.Numerics.RootFinding;
2+
using MzLibUtil;
3+
using System;
4+
using System.Collections.Generic;
5+
using System.Linq;
6+
using System.Runtime.CompilerServices;
7+
using System.Text;
8+
using System.Threading.Tasks;
9+
10+
namespace MassSpectrometry
11+
{
12+
public class MassIndexingEngine: IndexingEngine<IndexedMass>
13+
{
14+
protected override int BinsPerDalton => 1;
15+
public int MaxMass { get; set; } = 30000;
16+
17+
public MassIndexingEngine()
18+
{
19+
}
20+
21+
public static MassIndexingEngine? InitializeMassIndexingEngine(MsDataScan[] scanArray, DeconvolutionParameters deconParameters)
22+
{
23+
MassIndexingEngine newEngine = new();
24+
if (newEngine.IndexPeaks(scanArray, deconParameters))
25+
return newEngine;
26+
return null;
27+
}
28+
29+
public bool IndexPeaks(MsDataScan[] scanArray, DeconvolutionParameters deconParameters, MzRange mzRange = null, double minMass = 0, int minCharge = 1)
30+
{
31+
if (scanArray.IsNullOrEmpty() || scanArray.All(p => p == null))
32+
return false;
33+
34+
IndexedPeaks = new List<IndexedMass> [MaxMass];
35+
ScanInfoArray = new ScanInfo[scanArray.Length];
36+
for (int scanIndex = 0; scanIndex < scanArray.Length; scanIndex++)
37+
{
38+
ScanInfoArray[scanIndex] = new ScanInfo(scanArray[scanIndex].OneBasedScanNumber, scanIndex, scanArray[scanIndex].RetentionTime, scanArray[scanIndex].MsnOrder);
39+
var envelopes = Deconvoluter.Deconvolute(scanArray[scanIndex].MassSpectrum, deconParameters, mzRange);
40+
foreach (var envelope in envelopes)
41+
{
42+
if (envelope.MonoisotopicMass < minMass || envelope.Charge < minCharge)
43+
continue;
44+
int roundedMass = (int)Math.Round(envelope.MonoisotopicMass * BinsPerDalton, 0);
45+
IndexedPeaks[roundedMass] ??= new List<IndexedMass>();
46+
IndexedPeaks[roundedMass].Add(new IndexedMass(envelope, scanArray[scanIndex].RetentionTime, scanIndex, scanArray[scanIndex].MsnOrder));
47+
}
48+
}
49+
if (IndexedPeaks == null || IndexedPeaks.Length == 0)
50+
return false;
51+
else
52+
return true;
53+
}
54+
}
55+
}

mzLib/MassSpectrometry/PeakIndexing/PeakSpline/XicSpline.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,9 @@ public void SetXicSplineXYData(ExtractedIonChromatogram xic, bool cycle = false,
5252
}
5353
else
5454
{
55-
peakRts = xic.Peaks.Select(p => p.RetentionTime).ToArray();
55+
peakRts = xic.Peaks.Select(p => (float)p.RetentionTime).ToArray();
5656
}
57-
var peakIntensities = xic.Peaks.Select(p => p.Intensity).ToArray();
57+
var peakIntensities = xic.Peaks.Select(p => (float)p.Intensity).ToArray();
5858
xic.XYData = GetXicSplineData(peakRts, peakIntensities, start, end);
5959
}
6060

0 commit comments

Comments
 (0)