Skip to content

Commit 083ac19

Browse files
authored
Added intensity field to Sm from TSV (smith-chem-wisc#866)
* added intensity field * setter * intralink parsing tested * default to PSm if cannot fidn path * Test coverage up * Ensured MsDataFiles worked with new FileReader methods * Adjusted SmFromTsvReader assumption * Cleanup
1 parent bc6e75d commit 083ac19

File tree

10 files changed

+126
-29
lines changed

10 files changed

+126
-29
lines changed

mzLib/Readers/ExternalResults/ResultFiles/MsDataFileToResultFileAdapter.cs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,11 @@ public override MsDataScan GetOneBasedScanFromDynamicConnection(int oneBasedScan
2929
#region IResultFile Members
3030

3131
public new string FilePath { get; set; }
32-
public SupportedFileType FileType { get; private set; }
32+
public SupportedFileType FileType => FilePath.ParseFileType();
3333
public Software Software { get; set; } = Software.MassSpecFile;
3434
public List<MsDataScan> Results { get; set; }
3535
public void LoadResults()
3636
{
37-
FileType = FilePath.ParseFileType();
3837
_dataFile = MsDataFileReader.GetDataFile(FilePath).LoadAllStaticData();
3938
Results = _dataFile.GetAllScansList();
4039
}
@@ -51,7 +50,6 @@ public void WriteResults(string outputPath)
5150
public MsDataFileToResultFileAdapter(string filePath) : base(filePath)
5251
{
5352
FilePath = filePath;
54-
FileType = FilePath.ParseFileType();
5553
}
5654

5755
public MsDataFileToResultFileAdapter() : base("")

mzLib/Readers/FileReader.cs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,18 @@ public class FileReader
2020
/// <exception cref="MzLibException">Thrown if file type is not recognized or can't be converted to IResultFile</exception>
2121
public static IResultFile ReadResultFile(string filePath)
2222
{
23-
if (!File.Exists(filePath))
23+
if (!File.Exists(filePath) && !Directory.Exists(filePath)) // File and Directory allows Bruker's .d to also work here.
2424
throw new FileNotFoundException();
2525
var resultFileType = filePath.GetResultFileType(); // These calls can throw MzLibExceptions
26-
object resultFile = Activator.CreateInstance(resultFileType);
26+
27+
// Activator requires an empty constructor, they are guaranteed for any derived class of Readers.ResultFile
28+
object? resultFile = Activator.CreateInstance(resultFileType);
2729
if (resultFile is IResultFile castResultFile)
2830
{
2931
castResultFile.FilePath = filePath;
3032
return castResultFile;
3133
}
32-
throw new MzLibException($"{resultFileType} files cannot be converted to IResultFile");
34+
throw new MzLibException($"{resultFileType} files cannot be converted to IResultFile. File path: {filePath}");
3335
}
3436

3537
/// <summary>

mzLib/Readers/InternalResults/IndividualResultRecords/SpectrumMatchFromTsv.cs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@ public abstract class SpectrumMatchFromTsv
1919
public string FileNameWithoutExtension { get; protected set; }
2020
public int PrecursorScanNum { get; protected set; }
2121
public int PrecursorCharge { get; protected set; }
22+
public double? PrecursorIntensity { get; }
2223
public double PrecursorMz { get; protected set; }
2324
public double PrecursorMass { get; protected set; }
2425
public double? RetentionTime { get; protected set; }
2526
public double Score { get; protected set; }
2627
public int SpectrumMatchCount { get; protected set; }
2728
public string Accession { get; protected set; }
2829
public double? SpectralAngle { get; protected set; }
29-
public List<MatchedFragmentIon> MatchedIons { get; protected set; }
30+
public List<MatchedFragmentIon> MatchedIons { get; set; }
3031
public Dictionary<int, List<MatchedFragmentIon>> ChildScanMatchedIons { get; protected set; }
3132
public double QValue { get; protected set; }
3233
public double PEP { get; protected set; }
@@ -95,6 +96,7 @@ protected SpectrumMatchFromTsv(string line, char[] split, Dictionary<string, int
9596
}
9697

9798
PrecursorCharge = (int)double.Parse(spl[parsedHeader[SpectrumMatchFromTsvHeader.PrecursorCharge]].Trim(), CultureInfo.InvariantCulture);
99+
PrecursorIntensity = (parsedHeader[SpectrumMatchFromTsvHeader.PrecursorIntensity] < 0) ? null : Double.TryParse(spl[parsedHeader[SpectrumMatchFromTsvHeader.PrecursorIntensity]].Trim(), out double value) ? value : null;
98100
PrecursorMz = double.Parse(spl[parsedHeader[SpectrumMatchFromTsvHeader.PrecursorMz]].Trim(), CultureInfo.InvariantCulture);
99101
PrecursorMass = double.Parse(spl[parsedHeader[SpectrumMatchFromTsvHeader.PrecursorMass]].Trim(), CultureInfo.InvariantCulture);
100102
BaseSeq = RemoveParentheses(spl[parsedHeader[SpectrumMatchFromTsvHeader.BaseSequence]].Trim());
@@ -193,6 +195,7 @@ protected SpectrumMatchFromTsv(SpectrumMatchFromTsv psm, string fullSequence, in
193195
FileNameWithoutExtension = psm.FileNameWithoutExtension;
194196
PrecursorScanNum = psm.PrecursorScanNum;
195197
PrecursorCharge = psm.PrecursorCharge;
198+
PrecursorIntensity = psm.PrecursorIntensity;
196199
Score = psm.Score;
197200
MatchedIons = psm.MatchedIons.ToList();
198201
ChildScanMatchedIons = psm.ChildScanMatchedIons;

mzLib/Readers/InternalResults/IndividualResultRecords/SpectrumMatchFromTsvHeader.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ public class SpectrumMatchFromTsvHeader
1212
public const string TotalIonCurrent = "Total Ion Current";
1313
public const string PrecursorScanNum = "Precursor Scan Number";
1414
public const string PrecursorCharge = "Precursor Charge";
15+
public const string PrecursorIntensity = "Precursor Intensity";
1516
public const string PrecursorMz = "Precursor MZ";
1617
public const string PrecursorMass = "Precursor Mass";
1718
public const string Score = "Score";

mzLib/Readers/InternalResults/ResultFiles/SpectrumMatchTsvReader.cs

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ public static List<SpectrumMatchFromTsv> ReadTsv(string filePath, out List<strin
3333

3434
string line;
3535
Dictionary<string, int> parsedHeader = null;
36+
MzLibException? parsingException = null;
3637

37-
var fileType = filePath.ParseFileType();
3838
while (reader.Peek() > 0)
3939
{
4040
lineCount++;
@@ -49,20 +49,31 @@ public static List<SpectrumMatchFromTsv> ReadTsv(string filePath, out List<strin
4949

5050
try
5151
{
52-
switch (filePath.ParseFileType())
52+
SupportedFileType type;
53+
try
54+
{
55+
type = filePath.ParseFileType();
56+
}
57+
catch (MzLibException e)
58+
{
59+
// if the parsing fails due to file path not being in the correct format, assume Psm reader will work.
60+
parsingException = e;
61+
type = SupportedFileType.psmtsv;
62+
}
63+
64+
switch (type)
5365
{
5466
case SupportedFileType.osmtsv:
5567
psms.Add(new OsmFromTsv(line, Split, parsedHeader));
5668
break;
5769

5870
case SupportedFileType.psmtsv:
59-
case SupportedFileType.IntralinkResults:
6071
default:
6172
psms.Add(new PsmFromTsv(line, Split, parsedHeader));
6273
break;
6374
}
6475
}
65-
catch (Exception e)
76+
catch (Exception)
6677
{
6778
warnings.Add("Could not read line: " + lineCount);
6879
}
@@ -75,6 +86,13 @@ public static List<SpectrumMatchFromTsv> ReadTsv(string filePath, out List<strin
7586
warnings.Add("Warning: " + (lineCount - 1 - psms.Count) + " PSMs were not read.");
7687
}
7788

89+
// if we could not parse type, we assumed PSMs were in the file.
90+
// We were wrong and need to throw.
91+
if (parsingException is not null && psms.Count == 0)
92+
{
93+
throw new MzLibException($"No spectral matches found in file: {filePath}", parsingException);
94+
}
95+
7896
return psms;
7997
}
8098

@@ -108,6 +126,7 @@ public static Dictionary<string, int> ParseHeader(string header)
108126
parsedHeader.Add(SpectrumMatchFromTsvHeader.TotalIonCurrent, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.TotalIonCurrent));
109127
parsedHeader.Add(SpectrumMatchFromTsvHeader.PrecursorScanNum, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PrecursorScanNum));
110128
parsedHeader.Add(SpectrumMatchFromTsvHeader.PrecursorCharge, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PrecursorCharge));
129+
parsedHeader.Add(SpectrumMatchFromTsvHeader.PrecursorIntensity, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PrecursorIntensity));
111130
parsedHeader.Add(SpectrumMatchFromTsvHeader.PrecursorMz, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PrecursorMz));
112131
parsedHeader.Add(SpectrumMatchFromTsvHeader.PrecursorMass, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PrecursorMass));
113132
parsedHeader.Add(SpectrumMatchFromTsvHeader.Score, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.Score));

mzLib/Readers/Util/SupportedFileTypes.cs

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ public enum SupportedFileType
1313
MzML,
1414
Mgf,
1515
psmtsv,
16-
IntralinkResults,
1716
osmtsv,
1817
ToppicPrsm,
1918
ToppicPrsmSingle,
@@ -56,7 +55,6 @@ public static string GetFileExtension(this SupportedFileType type)
5655
SupportedFileType.BrukerTimsTof => ".d",
5756
SupportedFileType.psmtsv => ".psmtsv",
5857
SupportedFileType.osmtsv => ".osmtsv",
59-
SupportedFileType.IntralinkResults => "Intralinks.tsv",
6058
SupportedFileType.ToppicPrsm => "_prsm.tsv",
6159
SupportedFileType.ToppicPrsmSingle => "_prsm_single.tsv",
6260
SupportedFileType.ToppicProteoform => "_proteoform.tsv",
@@ -88,8 +86,14 @@ public static SupportedFileType ParseFileType(this string filePath)
8886
if (fileList.Any(file => file == "analysis.tdf"))
8987
return SupportedFileType.BrukerTimsTof;
9088
throw new MzLibException("Bruker file type not recognized");
91-
case ".psmtsv": return SupportedFileType.psmtsv;
92-
case ".osmtsv": return SupportedFileType.osmtsv;
89+
90+
case ".psmtsv":
91+
case ".tsv" when filePath.Contains("Intralinks"):
92+
return SupportedFileType.psmtsv;
93+
94+
case ".osmtsv":
95+
return SupportedFileType.osmtsv;
96+
9397
case ".feature":
9498
if (filePath.EndsWith(SupportedFileType.Ms1Feature.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase))
9599
return SupportedFileType.Ms1Feature;
@@ -131,8 +135,6 @@ public static SupportedFileType ParseFileType(this string filePath)
131135
return SupportedFileType.MsPathFinderTAllResults;
132136
if(filePath.EndsWith(SupportedFileType.ExperimentAnnotation.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase))
133137
return SupportedFileType.ExperimentAnnotation;
134-
if (filePath.EndsWith(SupportedFileType.IntralinkResults.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase))
135-
return SupportedFileType.IntralinkResults;
136138

137139
// these tsv cases are just .tsv and need an extra step to determine the type
138140
// currently need to distinguish between FlashDeconvTsv and MsFraggerPsm
@@ -173,7 +175,6 @@ public static Type GetResultFileType(this SupportedFileType type)
173175
SupportedFileType.Tsv_FlashDeconv => typeof(FlashDeconvTsvFile),
174176
SupportedFileType.psmtsv => typeof(PsmFromTsvFile),
175177
SupportedFileType.osmtsv => typeof(OsmFromTsvFile),
176-
SupportedFileType.IntralinkResults => typeof(PsmFromTsvFile),
177178
SupportedFileType.ToppicPrsm => typeof(ToppicSearchResultFile),
178179
SupportedFileType.ToppicPrsmSingle => typeof(ToppicSearchResultFile),
179180
SupportedFileType.ToppicProteoform => typeof(ToppicSearchResultFile),
@@ -187,6 +188,11 @@ public static Type GetResultFileType(this SupportedFileType type)
187188
SupportedFileType.MsPathFinderTAllResults => typeof(MsPathFinderTResultFile),
188189
SupportedFileType.CruxResult => typeof(CruxResultFile),
189190
SupportedFileType.ExperimentAnnotation => typeof(ExperimentAnnotationFile),
191+
SupportedFileType.ThermoRaw => typeof(MsDataFileToResultFileAdapter),
192+
SupportedFileType.MzML => typeof(MsDataFileToResultFileAdapter),
193+
SupportedFileType.Mgf => typeof(MsDataFileToResultFileAdapter),
194+
SupportedFileType.BrukerD => typeof(MsDataFileToResultFileAdapter),
195+
SupportedFileType.BrukerTimsTof => typeof(MsDataFileToResultFileAdapter),
190196
_ => throw new MzLibException("File type not supported")
191197
};
192198
}

0 commit comments

Comments
 (0)