Skip to content

Commit eb9869c

Browse files
RayMSMSnbollis
andauthored
IsoTracker: The quantification tool for isobaric peaks in Match-between-runs (smith-chem-wisc#832)
* sapce * remove space * add alex project * add test xic * replace flashlfq engine * update PeakIndexingEngine * add testRawFileReader * extend the TestMzML * add Threading Test * revised FlashLFQReuslt and FlashLfQEngine (remove the ploty) * revise the Peptide: SetRentionTime SetIsobaricpeptide * add the MovedIdentifications * revised ChromatographicPeak * add the tester in TestRawfileReader * Update 2/19/20025 1. Delete some unused function in XIC and XICgroup 2. Revise the BuildIdList function in XICgroup, avoid the depulicated situation 3. FlashLFQEngine: I do the XIC reference picking after XIC group building. 4. For the FlashLFQResult: revised the writing function about the IsoDict (even the count is 1) 5. Add the new function "SetMbrPeptide" for one peak in IsoDict 6. Add the detetionType label for IsobaricPeak 7. Add the testing for David_data * Output function updating In order to merge isopeak into isopeptide and protein group I made two function for merging it. AddIsoPeaks: Iterate through the Dict and put the isopeak into the _Peaks RevisedPeptide: Extracted the Isopeptide into the _PeptideWithModified * Update: try to output the quantified Protein and quntified Peptide file * Update 3/6/2025: Remove the local folders. files and associated tester for PR. * Delete the parallelSearch and ThreadingTest * change the tostringTester Because I add the new row (retention time) in the peptide output file. * Delete the unused testing "SnipMzMl" * Change the XIC folder name to "IsoTracker" Output option : IsoTracker and NonIsoTracker * Fix the output problem * Update to be ready to Merge 1. Revise some variable to be more readable 2. Add the unit test for XIC and XIC grouped * Comment the FlashLFQ testing with local folder and files * Add the output Testing (1) Check the QuantifiedPeptide header (2) Make a searching task went through IsoTracker code * Add the tester * change * change * Add the new Tester for converaging testing, detail informations are following: (1) Two snipped raw files are sitmulated for isobaric case (2) Pruned Psms file (only three isobaric peptides are included) (3) Checking the new Quantified file header (with RT information) (4) Checking only modified peptide can be Isobaric peptide * Add the TestFixture to avoid the codeCoverage Testing * Add TestFixture System.Diagnostics.CodeAnalysis.ExcludeFromCodeCoverage * Fixtext for the converage * Deleted some unuseful package and line * Deleted the Big tester * Add the tester description * Deleted the unused package in TestRawFileReader * Some changes (1) idGroupedBySe is grouped by base sequence and mass (2) revise the code according to the comment (3)Clean up some unuseful code and using package (4)Add four testers for ambiguous ID * Debug (1) the "Equal" function for Identification should be used exclusively for Isotracker. So I add a private function "IsoEqual" and "IdsEqual" (2)TestOutputToCustomDirectoryAndNameMzML: clean the existed file in this directory * small commit * Fixed the confict * testing the new branch name * Clean up the line * Debug (1) For ambiguity recognition: skip the duplicated id with the same modifiedSequence (2) Take care the null peak in IsoPeptideDict * Clean up the code line * Resolve comment (1) equal and getHashCode for PeakRegion (2) deleted the protein group checking for IsoEqual (3) Rename the tester (4) Rearragne the tester (5) fix the type * Debug (1) Filter out the DecoyId when doing the IsoTracker (2) Improve the peptide output files, if only peak was detected, then we don't view it like Isobaric peptide. * Fix the bug while the IsoTracker and MBR are on (1) IndexEngine should not be serlize when the IsoTracker is turned on * Debug (1) Avoid to deserialize the indexEngine while IsoTracker is on. (2) Add the tester for combining searching options * Resolve the comment (1) Add IEquatable interface into MoveId, PeakRegion and ChromatographicPeak * Fix the version number --------- Co-authored-by: Nic Bollis <[email protected]>
1 parent c91689d commit eb9869c

21 files changed

+58008
-42
lines changed

mzLib/FlashLFQ/ChromatographicPeak.cs

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
namespace FlashLFQ
99
{
10-
public class ChromatographicPeak
10+
public class ChromatographicPeak : IEquatable<ChromatographicPeak>
1111
{
1212
public double Intensity;
1313
public double ApexRetentionTime => Apex?.IndexedPeak.RetentionTime ?? -1;
@@ -16,6 +16,8 @@ public class ChromatographicPeak
1616
public int ScanCount => IsotopicEnvelopes.Count;
1717
public double SplitRT;
1818
public readonly bool IsMbrPeak;
19+
public DetectionType DetectionType { get; set; }
20+
public double PredictedRetentionTime { get; init; }
1921
public double MbrScore;
2022
public double PpmScore { get; set; }
2123
public double IntensityScore { get; set; }
@@ -46,6 +48,33 @@ public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fi
4648
RandomRt = randomRt;
4749
}
4850

51+
/// <summary>
52+
/// overloaded constructor for Isobaric_ambiguity peaks. In this case, the peak is identified by multiple identifications
53+
/// </summary>
54+
/// <param name="ids"></param>
55+
/// <param name="isMbrPeak"></param>
56+
/// <param name="fileInfo"></param>
57+
/// <param name="randomRt"></param>
58+
public ChromatographicPeak(List<Identification> ids, bool isMbrPeak, SpectraFileInfo fileInfo, double predictedRetentionTime, DetectionType detectionType = DetectionType.Imputed) :
59+
this(null, isMbrPeak, fileInfo)
60+
{
61+
PredictedRetentionTime = predictedRetentionTime;
62+
DetectionType = detectionType; // default to imputed
63+
Identifications = ids;
64+
}
65+
66+
public ChromatographicPeak(Identification id, bool isMbrPeak, SpectraFileInfo fileInfo, double predictedRetentionTime, DetectionType detectionType = DetectionType.Default) :
67+
this(id, isMbrPeak, fileInfo)
68+
{
69+
PredictedRetentionTime = predictedRetentionTime;
70+
DetectionType = detectionType; // default to imputed
71+
72+
if (detectionType == DetectionType.Default && isMbrPeak)
73+
{
74+
DetectionType = DetectionType.MBR;
75+
}
76+
}
77+
4978
public bool Equals(ChromatographicPeak peak)
5079
{
5180
return SpectraFileInfo.Equals(peak.SpectraFileInfo)
@@ -240,7 +269,18 @@ public override string ToString()
240269

241270
sb.Append("" + NumChargeStatesObserved + "\t");
242271

243-
if (IsMbrPeak)
272+
// temporary way to distinguish between MBR, MBR_IsoTrack, IsoTrack_Ambiguous and MSMS peaks
273+
if (IsMbrPeak && DetectionType == DetectionType.IsoTrack_MBR)
274+
{
275+
sb.Append("" + "MBR_IsoTrack" + "\t");
276+
}
277+
278+
else if (IsMbrPeak && DetectionType == DetectionType.IsoTrack_Ambiguous)
279+
{
280+
sb.Append("" + "IsoTrack_Ambiguous" + "\t");
281+
}
282+
283+
else if (IsMbrPeak)
244284
{
245285
sb.Append("" + "MBR" + "\t");
246286
}

mzLib/FlashLFQ/DetectionType.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@ public enum DetectionType
44
{
55
MSMS,
66
MBR,
7+
IsoTrack_MBR, // MBR detected by IsoTrack
8+
IsoTrack_Ambiguous, // Ambiguous(more than two Id in one peak) detected by IsoTrack
79
NotDetected,
810
MSMSAmbiguousPeakfinding,
911
MSMSIdentifiedButNotQuantified,
10-
Imputed
12+
Imputed,
13+
Default // Default value, will be removed in the future
1114
}
1215
}

mzLib/FlashLFQ/FlashLFQResults.cs

Lines changed: 102 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,32 @@
55
using System.IO;
66
using System.Linq;
77
using System.Text;
8+
using FlashLFQ.IsoTracker;
89

910
namespace FlashLFQ
1011
{
1112
public class FlashLfqResults
1213
{
14+
public bool IsoTracker = false;
1315
public readonly List<SpectraFileInfo> SpectraFiles;
1416
public readonly Dictionary<string, Peptide> PeptideModifiedSequences;
1517
public readonly Dictionary<string, ProteinGroup> ProteinGroups;
1618
public readonly Dictionary<SpectraFileInfo, List<ChromatographicPeak>> Peaks;
1719
private readonly HashSet<string> _peptideModifiedSequencesToQuantify;
20+
public Dictionary<string, Dictionary<PeakRegion, List<ChromatographicPeak>>> IsobaricPeptideDict = null;
1821
public string PepResultString { get; set; }
1922
public double MbrQValueThreshold { get; set; }
2023

2124
public FlashLfqResults(List<SpectraFileInfo> spectraFiles, List<Identification> identifications, double mbrQValueThreshold = 0.05,
22-
HashSet<string> peptideModifiedSequencesToQuantify = null)
25+
HashSet<string> peptideModifiedSequencesToQuantify = null, bool isIsoTracker = false)
2326
{
2427
SpectraFiles = spectraFiles;
2528
PeptideModifiedSequences = new Dictionary<string, Peptide>();
2629
ProteinGroups = new Dictionary<string, ProteinGroup>();
2730
Peaks = new Dictionary<SpectraFileInfo, List<ChromatographicPeak>>();
2831
MbrQValueThreshold = mbrQValueThreshold;
2932
_peptideModifiedSequencesToQuantify = peptideModifiedSequencesToQuantify ?? identifications.Where(id => !id.IsDecoy).Select(id => id.ModifiedSequence).ToHashSet();
33+
IsoTracker = isIsoTracker;
3034

3135
foreach (SpectraFileInfo file in spectraFiles)
3236
{
@@ -131,9 +135,11 @@ public void CalculatePeptideResults(bool quantifyAmbiguousPeptides)
131135
{
132136
sequence.Value.SetDetectionType(file, DetectionType.NotDetected);
133137
sequence.Value.SetIntensity(file, 0);
138+
sequence.Value.SetRetentionTime(file,0);
134139
}
135140
}
136141

142+
137143
foreach (var filePeaks in Peaks)
138144
{
139145
var groupedPeaks = filePeaks.Value
@@ -142,13 +148,13 @@ public void CalculatePeptideResults(bool quantifyAmbiguousPeptides)
142148
.Where(p => !p.IsMbrPeak || (p.MbrQValue < MbrQValueThreshold && !p.RandomRt))
143149
.GroupBy(p => p.Identifications.First().ModifiedSequence)
144150
.Where(group => _peptideModifiedSequencesToQuantify.Contains(group.Key))
145-
.ToList();
151+
.ToDictionary(p => p.Key, p => p.ToList());
146152

147153
foreach (var sequenceWithPeaks in groupedPeaks)
148154
{
149155
string sequence = sequenceWithPeaks.Key;
150-
double intensity = sequenceWithPeaks.Max(p => p.Intensity);
151-
ChromatographicPeak bestPeak = sequenceWithPeaks.First(p => p.Intensity == intensity);
156+
double intensity = sequenceWithPeaks.Value.Max(p => p.Intensity);
157+
ChromatographicPeak bestPeak = sequenceWithPeaks.Value.First(p => p.Intensity == intensity);
152158
DetectionType detectionType;
153159

154160
if (bestPeak.IsMbrPeak && intensity > 0)
@@ -169,6 +175,7 @@ public void CalculatePeptideResults(bool quantifyAmbiguousPeptides)
169175
}
170176

171177
PeptideModifiedSequences[sequence].SetIntensity(filePeaks.Key, intensity);
178+
PeptideModifiedSequences[sequence].SetRetentionTime(filePeaks.Key, bestPeak.ApexRetentionTime);
172179
PeptideModifiedSequences[sequence].SetDetectionType(filePeaks.Key, detectionType);
173180
}
174181

@@ -195,6 +202,7 @@ public void CalculatePeptideResults(bool quantifyAmbiguousPeptides)
195202
if (Math.Abs(alreadyRecordedIntensity) < 0.01)
196203
{
197204
PeptideModifiedSequences[sequence].SetDetectionType(filePeaks.Key, DetectionType.MSMSAmbiguousPeakfinding);
205+
PeptideModifiedSequences[sequence].SetRetentionTime(filePeaks.Key, ambiguousPeak.ApexRetentionTime);
198206
PeptideModifiedSequences[sequence].SetIntensity(filePeaks.Key, ambiguousPeak.Intensity);
199207
}
200208
// If the peptide intensity has already been recorded, that value is retained.
@@ -207,12 +215,19 @@ public void CalculatePeptideResults(bool quantifyAmbiguousPeptides)
207215
{
208216
PeptideModifiedSequences[sequence].SetDetectionType(filePeaks.Key, DetectionType.MSMSAmbiguousPeakfinding);
209217
PeptideModifiedSequences[sequence].SetIntensity(filePeaks.Key, 0);
218+
PeptideModifiedSequences[sequence].SetRetentionTime(filePeaks.Key, ambiguousPeak.ApexRetentionTime);
210219
}
211220
}
212221
}
213222

214223
}
215224

225+
if (IsoTracker && IsobaricPeptideDict != null)
226+
{
227+
// We view each Isobaric peak as an individual peptide, so we need to add them to the peptide list
228+
RevisedModifiedPeptides();
229+
}
230+
216231
if (!quantifyAmbiguousPeptides)
217232
{
218233
HandleAmbiguityInFractions();
@@ -591,11 +606,24 @@ public void WriteResults(string peaksOutputPath, string modPeptideOutputPath, st
591606
{
592607
using (StreamWriter output = new StreamWriter(modPeptideOutputPath))
593608
{
594-
output.WriteLine(Peptide.TabSeparatedHeader(SpectraFiles));
595-
596-
foreach (var peptide in PeptideModifiedSequences.OrderBy(p => p.Key))
609+
if (IsoTracker)
610+
{
611+
output.WriteLine(Peptide.TabSeparatedHeader_IsoTracker(SpectraFiles));
612+
// we want to output with same iso group index followed by peak order.
613+
foreach (var peptide in PeptideModifiedSequences
614+
.OrderBy(p => p.Value.IsoGroupIndex ?? int.MaxValue)
615+
.ThenBy(p => p.Value.PeakOrder ?? int.MinValue))
616+
{
617+
output.WriteLine(peptide.Value.ToString(SpectraFiles, IsoTracker));
618+
}
619+
}
620+
else
597621
{
598-
output.WriteLine(peptide.Value.ToString(SpectraFiles));
622+
output.WriteLine(Peptide.TabSeparatedHeader(SpectraFiles));
623+
foreach (var peptide in PeptideModifiedSequences.OrderBy(p => p.Key))
624+
{
625+
output.WriteLine(peptide.Value.ToString(SpectraFiles, IsoTracker));
626+
}
599627
}
600628
}
601629
}
@@ -759,5 +787,71 @@ public static void MedianPolish(double[][] table, int maxIterations = 10, double
759787
sumAbsoluteResiduals = iterationSumAbsoluteResiduals;
760788
}
761789
}
790+
791+
/// <summary>
792+
/// This method is used to re-edit the peptide List by adding the isobaric peptides and remove the former peptide.
793+
/// </summary>
794+
internal void RevisedModifiedPeptides()
795+
{
796+
int isoGroupIndex = 1;
797+
//If the isobaric peptide dictionary is not empty, then we need to revise the peptide list.
798+
foreach (var isoPeptides in IsobaricPeptideDict.Where(p=>p.Value.Count != 0))
799+
{
800+
string peptideSequence = isoPeptides.Key;
801+
Peptide originalPeptide = PeptideModifiedSequences[peptideSequence];
802+
803+
// Remove the formal peptide from the peptide list
804+
var allIDs = isoPeptides.Value.Values
805+
.SelectMany(p => p)
806+
.Where(p => p != null)
807+
.SelectMany(p=>p.Identifications)
808+
.DistinctBy(p=>p.ModifiedSequence)
809+
.Select(p=>p.ModifiedSequence)
810+
.ToList();
811+
foreach (var modSeq in allIDs)
812+
{
813+
if (PeptideModifiedSequences.ContainsKey(modSeq))
814+
{
815+
PeptideModifiedSequences.Remove(modSeq);
816+
}
817+
}
818+
819+
// Add the isobaric peptides to the peptide list
820+
821+
//If there is only one peak for the isobaric peptides, then we don't view them as isobaric peptides.
822+
if (isoPeptides.Value.Values.Count == 1)
823+
{
824+
var isoPeptidePeaks = isoPeptides.Value.Values.First();
825+
var allSeq = isoPeptidePeaks
826+
.Where(p => p != null)
827+
.SelectMany(p => p.Identifications)
828+
.Select(p => p.ModifiedSequence)
829+
.Distinct()
830+
.ToList();
831+
Peptide peptide = new Peptide(string.Join(" | ", allSeq), originalPeptide.BaseSequence, originalPeptide.UseForProteinQuant, originalPeptide.ProteinGroups);
832+
peptide.SetIsobaricPeptide(isoPeptidePeaks); //When we set the peptide as IsobaricPeptide, then the retention time, intensity and detectionType will be set from the chromPeak automatically.
833+
PeptideModifiedSequences[peptide.Sequence] = peptide;
834+
}
835+
//If there are multiple peaks for the isobaric peptides, then we view them as isobaric peptides.
836+
else
837+
{
838+
int peakIndex = 1;
839+
foreach (var isoPeptidePeaks in isoPeptides.Value.Values.ToList())
840+
{
841+
var allSeq = isoPeptidePeaks
842+
.Where(p => p != null)
843+
.SelectMany(p => p.Identifications)
844+
.Select(p => p.ModifiedSequence)
845+
.Distinct()
846+
.ToList();
847+
Peptide peptide = new Peptide(string.Join(" | ", allSeq) + " Isopeptide_peak" + peakIndex, originalPeptide.BaseSequence, originalPeptide.UseForProteinQuant, originalPeptide.ProteinGroups, isoGroupIndex, peakIndex);
848+
peptide.SetIsobaricPeptide(isoPeptidePeaks); //When we set the peptide as IsobaricPeptide, then the retention time, intensity and detectionType will be set from the chromPeak automatically.
849+
PeptideModifiedSequences[peptide.Sequence] = peptide;
850+
peakIndex++;
851+
}
852+
isoGroupIndex++;
853+
}
854+
}
855+
}
762856
}
763857
}

0 commit comments

Comments
 (0)