Skip to content

Commit 5d4b3a0

Browse files
Readers: Modification Conversion (smith-chem-wisc#853)
* Moved Quantifyable to Readers namespace * Modification Embedding and Conversion * Implemented ISpectralMatch on MSPT * MSPT Mod Tests * Tested Conversion Code. * Address shortreed comments. * Switched over to direct MM Mod Parsing * Cleanup * Responded to review comments * once again * Fixed file name spelling * fix namespace issue --------- Co-authored-by: Alexander-Sol <[email protected]>
1 parent 083ac19 commit 5d4b3a0

23 files changed

+201181
-150
lines changed

mzLib/Omics/BioPolymerWithSetModsExtensions.cs

Lines changed: 2 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -108,39 +108,6 @@ public static string EssentialSequence(this IBioPolymerWithSetMods withSetMods,
108108
return essentialSequence;
109109
}
110110

111-
/// <summary>
112-
/// Determines the full sequence of a BioPolymerWithSetMods from its base sequence and modifications
113-
/// </summary>
114-
/// <param name="withSetMods"></param>
115-
/// <returns></returns>
116-
public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods)
117-
{
118-
// start string builder with initial capacity to avoid resizing costs.
119-
var subSequence = new StringBuilder(withSetMods.BaseSequence.Length + withSetMods.AllModsOneIsNterminus.Count * 30);
120-
121-
// modification on peptide N-terminus
122-
if (withSetMods.AllModsOneIsNterminus.TryGetValue(1, out Modification? mod))
123-
{
124-
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
125-
}
126-
127-
for (int r = 0; r < withSetMods.Length; r++)
128-
{
129-
subSequence.Append(withSetMods[r]);
130-
131-
// modification on this residue
132-
if (withSetMods.AllModsOneIsNterminus.TryGetValue(r + 2, out mod))
133-
{
134-
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
135-
}
136-
}
137-
138-
// modification on peptide C-terminus
139-
if (withSetMods.AllModsOneIsNterminus.TryGetValue(withSetMods.Length + 2, out mod))
140-
{
141-
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
142-
}
143-
144-
return subSequence.ToString();
145-
}
111+
public static string DetermineFullSequence(this IBioPolymerWithSetMods withSetMods) => IBioPolymerWithSetMods
112+
.DetermineFullSequence(withSetMods.BaseSequence, withSetMods.AllModsOneIsNterminus);
146113
}

mzLib/Omics/IBioPolymerWithSetMods.cs

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,5 +163,39 @@ public static Dictionary<int, Modification> GetModificationDictionaryFromFullSeq
163163
/// <returns></returns>
164164
public static List<Modification> GetModificationsFromFullSequence(string fullSequence,
165165
Dictionary<string, Modification> allModsKnown) => [.. GetModificationDictionaryFromFullSequence(fullSequence, allModsKnown).Values];
166+
167+
/// <summary>
168+
/// Determines the full sequence of a BioPolymerWithSetMods from its base sequence and modifications
169+
/// </summary>
170+
public static string DetermineFullSequence(string baseSequence, IDictionary<int, Modification> allModsOneIsNterminus)
171+
{
172+
// start string builder with initial capacity to avoid resizing costs.
173+
var subSequence = new StringBuilder(baseSequence.Length + allModsOneIsNterminus.Count * 60);
174+
175+
// modification on peptide N-terminus
176+
if (allModsOneIsNterminus.TryGetValue(1, out Modification? mod))
177+
{
178+
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
179+
}
180+
181+
for (int r = 0; r < baseSequence.Length; r++)
182+
{
183+
subSequence.Append(baseSequence[r]);
184+
185+
// modification on this residue
186+
if (allModsOneIsNterminus.TryGetValue(r + 2, out mod))
187+
{
188+
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
189+
}
190+
}
191+
192+
// modification on peptide C-terminus
193+
if (allModsOneIsNterminus.TryGetValue(baseSequence.Length + 2, out mod))
194+
{
195+
subSequence.Append($"[{mod.ModificationType}:{mod.IdWithMotif}]");
196+
}
197+
198+
return subSequence.ToString();
199+
}
166200
}
167201
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
using Omics.Modifications;
2+
3+
namespace Readers;
4+
5+
public interface ISpectralMatch
6+
{
7+
/// <summary>
8+
/// The scan number of the identification
9+
/// </summary>
10+
public int OneBasedScanNumber { get; }
11+
12+
/// <summary>
13+
/// Primary Sequence
14+
/// </summary>
15+
public string BaseSequence { get; }
16+
17+
/// <summary>
18+
/// Modified Sequence in MetaMorpheus format
19+
/// </summary>
20+
public string FullSequence { get; }
21+
22+
/// <summary>
23+
/// The accession (unique identifier) of the identification
24+
/// </summary>
25+
public string Accession { get; }
26+
27+
/// <summary>
28+
/// If the given Spectral Match is a decoy
29+
/// </summary>
30+
public bool IsDecoy { get; }
31+
32+
/// <summary>
33+
/// The Mass Spec file name without the extension
34+
/// </summary>
35+
public string FileNameWithoutExtension { get; }
36+
37+
/// <summary>
38+
/// Modifications on the spectral match
39+
/// </summary>
40+
public Dictionary<int, Modification> AllModsOneIsNterminus { get; }
41+
}

mzLib/Readers/BaseClasses/ResultFile.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ public bool CanRead(string filePath)
8181

8282
#region Operators
8383

84+
public TResult this[int index]
85+
{
86+
get => Results[index];
87+
set => Results[index] = value;
88+
}
89+
8490
public static ResultFile<TResult> operator +(ResultFile<TResult> thisFile, TResult resultToAdd)
8591
{
8692
thisFile.Results.Add(resultToAdd);

mzLib/Readers/ExternalResults/IndividualResultRecords/MsPathFinderTResult.cs

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
using CsvHelper.Configuration.Attributes;
22
using CsvHelper.Configuration;
3-
using System;
4-
using System.Collections.Generic;
5-
using System.Linq;
6-
using System.Text;
7-
using System.Threading.Tasks;
83
using Chemistry;
4+
using Omics.Modifications;
5+
using Omics;
96

107
namespace Readers
118
{
12-
public class MsPathFinderTResult
9+
public class MsPathFinderTResult : ISpectralMatch
1310
{
1411
public static CsvConfiguration CsvConfiguration { get; } = new CsvConfiguration(System.Globalization.CultureInfo.InvariantCulture)
1512
{
@@ -20,7 +17,6 @@ public class MsPathFinderTResult
2017
BadDataFound = null,
2118
};
2219

23-
2420
[Name("Scan")]
2521
public int OneBasedScanNumber { get; set; }
2622

@@ -89,13 +85,40 @@ public class MsPathFinderTResult
8985

9086
#region InterpretedFields
9187

92-
[Ignore] private string _accession = null;
88+
[Ignore] private string? _accession = null;
9389
[Ignore] public string Accession => _accession ??= ProteinName.Split('|')[1].Trim();
9490

9591
[Ignore] private bool? _isDecoy = null;
9692
[Ignore] public bool IsDecoy => _isDecoy ??= ProteinName.StartsWith("XXX");
9793
[Optional] public string FileNameWithoutExtension { get; set; }
9894

95+
[Ignore] private Dictionary<int, Modification>? _allModsOneIsNterminus;
96+
[Ignore] public Dictionary<int, Modification> AllModsOneIsNterminus => _allModsOneIsNterminus ??= ParseModifications();
97+
98+
[Ignore] private string? _fullSequence;
99+
[Ignore] public string FullSequence => _fullSequence ??= IBioPolymerWithSetMods.DetermineFullSequence(BaseSequence, AllModsOneIsNterminus);
100+
101+
private Dictionary<int, Modification> ParseModifications()
102+
{
103+
var mods = new Dictionary<int, Modification>();
104+
if (string.IsNullOrEmpty(Modifications))
105+
return mods;
106+
107+
var modStrings = Modifications.Split(',');
108+
foreach (var modString in modStrings)
109+
{
110+
var modSplits = modString.Split(' ');
111+
var name = modSplits[0];
112+
var location = int.Parse(modSplits[1]);
113+
114+
var modifiedResidue = location == 0 ? 'X' : BaseSequence[location - 1];
115+
var mmMod = ModificationConverter.GetClosestMod(name, modifiedResidue);
116+
117+
mods.Add(location+1, mmMod);
118+
}
119+
return mods;
120+
}
121+
99122
#endregion
100123
}
101124
}

mzLib/Readers/ExternalResults/ResultFiles/MsPathFinderTResultFile.cs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
11
using CsvHelper;
2-
using System;
3-
using System.Collections.Generic;
4-
using System.Linq;
5-
using System.Text;
6-
using System.Threading.Tasks;
72
using Easy.Common.Extensions;
83

94
namespace Readers
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
using CsvHelper.Configuration;
2+
using CsvHelper.TypeConversion;
3+
using CsvHelper;
4+
using System.Text;
5+
using MzLibUtil;
6+
7+
namespace Readers;
8+
9+
/// <summary>
10+
/// Converts the chemical formula from MsPathFinderT to MetaMorpheus
11+
/// MsPathFinderT: "C(460) H(740) N(136) O(146) S(0)"
12+
/// MetaMorpheus: "C460H740N136O146S"
13+
/// </summary>
14+
internal class MsPathFinderTCompositionToChemicalFormulaConverter : DefaultTypeConverter
15+
{
16+
public override object ConvertFromString(string text, IReaderRow row, MemberMapData memberMapData)
17+
{
18+
var composition = text.Split(' ').Where(p => p != "").ToArray();
19+
var chemicalFormula = new Chemistry.ChemicalFormula();
20+
foreach (var element in composition)
21+
{
22+
var elementSplit = element.Split('(');
23+
var elementName = elementSplit[0];
24+
var elementCount = int.Parse(elementSplit[1].Replace(")", ""));
25+
chemicalFormula.Add(elementName, elementCount);
26+
}
27+
return chemicalFormula;
28+
}
29+
30+
public override string ConvertToString(object value, IWriterRow row, MemberMapData memberMapData)
31+
{
32+
var chemicalFormula = value as Chemistry.ChemicalFormula ?? throw new MzLibException("Cannot convert input to ChemicalFormula");
33+
var sb = new StringBuilder();
34+
35+
bool onNumber = false;
36+
foreach (var character in chemicalFormula.Formula)
37+
{
38+
if (!char.IsDigit(character)) // if is a letter
39+
{
40+
if (onNumber)
41+
{
42+
sb.Append(") " + character);
43+
onNumber = false;
44+
}
45+
else
46+
sb.Append(character);
47+
}
48+
else
49+
{
50+
if (!onNumber)
51+
{
52+
sb.Append("(" + character);
53+
onNumber = true;
54+
}
55+
else
56+
sb.Append(character);
57+
}
58+
}
59+
60+
var stringForm = sb.ToString();
61+
if (char.IsDigit(stringForm.Last()))
62+
stringForm += ")";
63+
else
64+
stringForm += "(1)";
65+
66+
return stringForm;
67+
}
68+
}
69+

0 commit comments

Comments
 (0)