Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MetaMorpheus/CMD/CMD.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
<PackageReference Include="Microsoft.ML.CpuMath" Version="3.0.1" />
<PackageReference Include="Microsoft.ML.FastTree" Version="3.0.1" />
<PackageReference Include="Microsoft.NETCore.App" Version="2.2.8" />
<PackageReference Include="mzLib" Version="1.0.569" />
<PackageReference Include="mzLib" Version="5.3.1" />
<PackageReference Include="Nett" Version="0.15.0" />
<PackageReference Include="SQLite.Interop.dll" Version="1.0.103" />
<PackageReference Include="System.Data.SQLite" Version="1.0.118" />
Expand Down
1 change: 1 addition & 0 deletions MetaMorpheus/CMD/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using EngineLayer.DatabaseLoading;
using Omics.Modifications;
using TaskLayer;

Expand Down
341 changes: 341 additions & 0 deletions MetaMorpheus/EngineLayer/DatabaseLoading/DatabaseLoadingEngine.cs

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions MetaMorpheus/EngineLayer/DatabaseLoading/DbForTask.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#nullable enable
namespace EngineLayer.DatabaseLoading;

public class DbForTask
{
public DbForTask(string filePath, bool isContaminant, string? decoyIdentifier = null)
{
FilePath = filePath;
IsContaminant = isContaminant;
FileName = System.IO.Path.GetFileName(filePath);
IsSpectralLibrary = GlobalVariables.GetFileExtension(filePath).ToLowerInvariant() == ".msp";
DecoyIdentifier = decoyIdentifier ?? GlobalVariables.DecoyIdentifier;
}

public bool IsSpectralLibrary { get; }
public string FilePath { get; }
public bool IsContaminant { get; }
public string FileName { get; }
public string DecoyIdentifier { get; }
public int? BioPolymerCount { get; internal set; } = null;
public int? TargetCount { get; internal set; } = null;
public int? DecoyCount { get; internal set; } = null;
}
2 changes: 1 addition & 1 deletion MetaMorpheus/EngineLayer/EngineLayer.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
<PackageReference Include="Microsoft.ML.CpuMath" Version="3.0.1" />
<PackageReference Include="Microsoft.ML.FastTree" Version="3.0.1" />
<PackageReference Include="Microsoft.NETCore.App" Version="2.2.8" />
<PackageReference Include="mzLib" Version="1.0.569" />
<PackageReference Include="mzLib" Version="5.3.1" />
<PackageReference Include="NETStandard.Library" Version="2.0.3" />
<PackageReference Include="Nett" Version="0.15.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
Expand Down
1 change: 1 addition & 0 deletions MetaMorpheus/EngineLayer/GlobalVariables.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace EngineLayer
{
public static class GlobalVariables
{
public static string DecoyIdentifier { get; set; } = "DECOY";
// for now, these are only used for error-checking in the command-line version.
// compressed versions of the protein databases (e.g., .xml.gz) are also supported
public static List<string> AcceptedDatabaseFormats { get; private set; }
Expand Down
12 changes: 10 additions & 2 deletions MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public class GptmdEngine : MetaMorpheusEngine
private readonly Dictionary<string, Tolerance> FilePathToPrecursorMassTolerance; // this exists because of file-specific tolerances
//The ScoreTolerance property is used to differentiatie when a PTM candidate is added to a peptide. We check the score at each position and then add that mod where the score is highest.
private readonly double ScoreTolerance = 0.1;
private static readonly double QValueNotchThreshold = 0.05;
public Dictionary<string, HashSet<Tuple<int, Modification>>> ModDictionary { get; init; }
private readonly List<IGptmdFilter> Filters;

Expand Down Expand Up @@ -84,7 +85,9 @@ protected override MetaMorpheusEngineResults RunSpecific()
int modsAdded = 0;

int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile;
var psms = AllIdentifications.Where(b => b.FdrInfo.QValueNotch <= 0.05 && !b.IsDecoy).ToList();

// Keep psms if they are below q value notch threshold, or notch ambiguous results with hypothesis below threshold.
var psms = AllIdentifications.Where(b => b.FdrInfo.QValueNotch <= QValueNotchThreshold || (Math.Abs(b.FdrInfo.QValueNotch - 2) < 0.001 && b.BestMatchingBioPolymersWithSetMods.Any(p => p.QValueNotch.HasValue && p.QValueNotch <= QValueNotchThreshold))).ToList();
if (psms.Any() == false)
{
return new GptmdResults(this, ModDictionary, 0);
Expand All @@ -106,9 +109,14 @@ protected override MetaMorpheusEngineResults RunSpecific()
var peptideTheorProducts = new List<Product>();
List<(int site, Modification mod, string proteinAccession)> bestMatches = [];

foreach (var pepWithSetMods in psm.BestMatchingBioPolymersWithSetMods.Select(v => v.SpecificBioPolymer))
foreach (var hypothesis in psm.BestMatchingBioPolymersWithSetMods.Select(v => v))
{
// skip hypotheses that are above the q value notch threshold - will be null unless psm is notch ambiguous
if (hypothesis.QValueNotch.HasValue && hypothesis.QValueNotch > QValueNotchThreshold)
continue;

bestMatches.Clear();
var pepWithSetMods = hypothesis.SpecificBioPolymer;
var isVariantProtein = pepWithSetMods.Parent != pepWithSetMods.Parent.ConsensusVariant;
var possibleModifications = GetPossibleMods(precursorMass, GptmdModifications, Combos,
FilePathToPrecursorMassTolerance[fileName], pepWithSetMods);
Expand Down
3 changes: 3 additions & 0 deletions MetaMorpheus/EngineLayer/MetaMorpheusEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
using Transcriptomics.Digestion;

namespace EngineLayer
Expand Down Expand Up @@ -289,6 +290,8 @@ public MetaMorpheusEngineResults Run()
return myResults;
}

public Task<MetaMorpheusEngineResults> RunAsync() => Task.Run(Run);

/// <summary>
/// Changes the name of the analytes from "peptide" to "proteoform" or "oligo" if the protease is set to top-down
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ public class ProteinScoringAndFdrEngine : MetaMorpheusEngine
private readonly bool TreatModPeptidesAsDifferentPeptides;
private readonly bool MergeIndistinguishableProteinGroups;
private readonly List<ProteinGroup> ProteinGroups;
private readonly HashSet<string> _decoyIdentifiers;

public ProteinScoringAndFdrEngine(List<ProteinGroup> proteinGroups, List<SpectralMatch> newPsms, bool noOneHitWonders, bool treatModPeptidesAsDifferentPeptides, bool mergeIndistinguishableProteinGroups, CommonParameters commonParameters, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, List<string> nestedIds) : base(commonParameters, fileSpecificParameters, nestedIds)
{
Expand All @@ -20,6 +21,7 @@ public ProteinScoringAndFdrEngine(List<ProteinGroup> proteinGroups, List<Spectra
NoOneHitWonders = noOneHitWonders;
TreatModPeptidesAsDifferentPeptides = treatModPeptidesAsDifferentPeptides;
MergeIndistinguishableProteinGroups = mergeIndistinguishableProteinGroups;
_decoyIdentifiers = proteinGroups.SelectMany(p => p.Proteins.Where(b => b.IsDecoy).Select(b => b.Accession.Split('_')[0])).ToHashSet();
}

protected override MetaMorpheusEngineResults RunSpecific()
Expand All @@ -31,9 +33,12 @@ protected override MetaMorpheusEngineResults RunSpecific()
return myAnalysisResults;
}

private static string StripDecoyIdentifier(string proteinGroupName) //we're keeping only the better scoring protein group for each target/decoy pair. to do that we need to strip decoy from the name temporarily. this is the "top-picked" method
private static string StripDecoyIdentifier(string proteinGroupName, HashSet<string> decoyIdentifiers) //we're keeping only the better scoring protein group for each target/decoy pair. to do that we need to strip decoy from the name temporarily. this is the "top-picked" method
{
return proteinGroupName.Contains("DECOY_") ? proteinGroupName.Replace("DECOY_", "") : proteinGroupName;
foreach (var ident in decoyIdentifiers.Where(proteinGroupName.Contains))
return proteinGroupName.Replace($"{ident}_", "");

return proteinGroupName;
}

private void ScoreProteinGroups(List<ProteinGroup> proteinGroups, IEnumerable<SpectralMatch> psmList)
Expand Down Expand Up @@ -142,7 +147,7 @@ private List<ProteinGroup> DoProteinFdr(List<ProteinGroup> proteinGroups)
{
foreach (var protein in pg.Proteins)
{
string stippedAccession = StripDecoyIdentifier(protein.Accession); //remove "DECOY_" from the accession
string stippedAccession = StripDecoyIdentifier(protein.Accession, _decoyIdentifiers); //remove "DECOY_" from the accession

if (accessionToProteinGroup.TryGetValue(stippedAccession, out List<ProteinGroup> groups))
{
Expand Down
35 changes: 31 additions & 4 deletions MetaMorpheus/GUI/ForDisplayingInDataGrids/ProteinDbForDataGrid.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
using System.IO;
using EngineLayer;
using EngineLayer.DatabaseLoading;
using GuiFunctions;
using TaskLayer;

namespace MetaMorpheusGUI
{
public class ProteinDbForDataGrid
public class ProteinDbForDataGrid : BaseViewModel
{
#region Public Constructors

Expand All @@ -27,17 +30,41 @@ public ProteinDbForDataGrid(DbForTask uu)
Contaminant = uu.IsContaminant;
FilePath = uu.FilePath;
FileName = uu.FileName;
DecoyIdentifier = uu.DecoyIdentifier;
}

#endregion Public Constructors

#region Public Properties

public bool Use { get; set; }
public bool Contaminant { get; set; }
private bool _use;
private bool _isContaminant;
private bool _inProgress;
private string _decoyIdentifier = GlobalVariables.DecoyIdentifier;

public bool Use
{
get => _use;
set { _use = value; OnPropertyChanged(nameof(Use)); }
}
public bool Contaminant
{
get => _isContaminant;
set { _isContaminant = value; OnPropertyChanged(nameof(Contaminant)); }
}
public string FileName { get; private set; }
public string FilePath { get; private set; }
public bool InProgress { get; private set; }
public bool InProgress
{
get => _inProgress;
private set { _inProgress = value; OnPropertyChanged(nameof(InProgress)); }
}

public string DecoyIdentifier
{
get => _decoyIdentifier;
set { _decoyIdentifier = value; OnPropertyChanged(nameof(DecoyIdentifier)); }
}

#endregion Public Properties

Expand Down
2 changes: 1 addition & 1 deletion MetaMorpheus/GUI/GUI.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
<PackageReference Include="Microsoft.ML.CpuMath" Version="3.0.1" />
<PackageReference Include="Microsoft.ML.FastTree" Version="3.0.1" />
<PackageReference Include="Microsoft.NETCore.App" Version="2.2.8" />
<PackageReference Include="mzLib" Version="1.0.569" />
<PackageReference Include="mzLib" Version="5.3.1" />
<PackageReference Include="Nett" Version="0.15.0" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="OxyPlot.Core" Version="2.0.0" />
Expand Down
14 changes: 8 additions & 6 deletions MetaMorpheus/GUI/MainWindow.xaml
Original file line number Diff line number Diff line change
Expand Up @@ -435,10 +435,11 @@
<Style TargetType="{x:Type DataGridCell}" BasedOn="{StaticResource DataGridCellStyle}" />
</DataGrid.CellStyle>
<DataGrid.Columns>
<DataGridTextColumn Header="File" Binding="{Binding FileName, Mode=OneWay}" Width="250" />
<DataGridTextColumn Header="File Path" Binding="{Binding FilePath, Mode=OneWay}" Width="200" />
<DataGridTextColumn Header="Contaminant?" Binding="{Binding Contaminant, Mode=OneWay}" Width="250" />
<DataGridCheckBoxColumn Header="Use?" Binding="{Binding Use, Mode=TwoWay}"/>
<DataGridTextColumn Header="File" Binding="{Binding FileName, Mode=OneWay}" Width="366" />
<DataGridTextColumn Header="File Path" Binding="{Binding FilePath, Mode=OneWay}" Width="160" />
<DataGridCheckBoxColumn Header="Contaminant?" Binding="{Binding Contaminant, Mode=TwoWay}" MinWidth="90" />
<DataGridCheckBoxColumn Header="Use?" Binding="{Binding Use, Mode=TwoWay}" MinWidth="80"/>
<DataGridTextColumn Header="Decoy Identifier" Binding="{Binding DecoyIdentifier, Mode=TwoWay}" Width="160"/>
</DataGrid.Columns>
</DataGrid>
</Grid>
Expand Down Expand Up @@ -583,8 +584,9 @@
</DataGrid.CellStyle>
<DataGrid.Columns>
<DataGridTextColumn Header="File" Binding="{Binding FileName, Mode=OneWay}" Width="230" />
<DataGridTextColumn Header="Contaminant?" Binding="{Binding Contaminant, Mode=OneWay}" Width="170" />
<DataGridCheckBoxColumn Header="Use?" Binding="{Binding Use, Mode=TwoWay}"/>
<DataGridCheckBoxColumn Header="Use?" Binding="{Binding Use, Mode=TwoWay}" MinWidth="40"/>
<DataGridCheckBoxColumn Header="Contaminant?" Binding="{Binding Contaminant, Mode=TwoWay}" MinWidth="90"/>
<DataGridTextColumn Header="Decoy Identifier" Binding="{Binding DecoyIdentifier, Mode=TwoWay}" Width="160" />
</DataGrid.Columns>
</DataGrid>
<Button Name="MiniAddProteinDbButton" Grid.Row="0" Width="20" Height="20" HorizontalAlignment="Right" VerticalAlignment="Bottom" Margin="0,0,1,1" Content="+"
Expand Down
11 changes: 7 additions & 4 deletions MetaMorpheus/GUI/MainWindow.xaml.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
using System.Text.RegularExpressions;
using Readers.InternalResults;
using System.Diagnostics;
using EngineLayer.DatabaseLoading;

namespace MetaMorpheusGUI
{
Expand Down Expand Up @@ -668,7 +669,8 @@ private void DatabaseOrSpectraFile_DoubleClick(object sender, MouseButtonEventAr
}

// user is probably just checking or unchecking a checkbox, don't open the file
if (sender is DataGridCell cell && cell.Column is DataGridCheckBoxColumn)
// User is double clicking the decoy ident column.
if (sender is DataGridCell { Column: DataGridCheckBoxColumn } || sender is DataGridCell { Column: DataGridTextColumn, TabIndex: >= 3})
{
return;
}
Expand Down Expand Up @@ -989,7 +991,7 @@ private void RunAllTasks_Click(object sender, RoutedEventArgs e)
// everything is ready to run
EverythingRunnerEngine a = new EverythingRunnerEngine(InProgressTasks.Select(b => (b.DisplayName, b.Task)).ToList(),
SpectraFiles.Where(b => b.Use).Select(b => b.FilePath).ToList(),
ProteinDatabases.Where(b => b.Use).Select(b => new DbForTask(b.FilePath, b.Contaminant)).ToList(),
ProteinDatabases.Where(b => b.Use).Select(b => new DbForTask(b.FilePath, b.Contaminant, b.DecoyIdentifier)).ToList(),
outputFolder);

var t = new Task(a.Run);
Expand Down Expand Up @@ -1091,11 +1093,12 @@ private void BoxWithList_PreviewKeyDown(object sender, KeyEventArgs e)
{
if (!RunTasksButton.IsEnabled) return;


switch (e.Key)
{
// delete selected task/db/spectra
case Key.Delete:
case Key.Back:
case Key.Delete when e.OriginalSource is DataGrid:
case Key.Back when e.OriginalSource is DataGrid:
Delete_Click(sender, e);
e.Handled = true;
break;
Expand Down
1 change: 1 addition & 0 deletions MetaMorpheus/GUI/Util/GuiGlobalParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public class GuiGlobalParams

// User can set a custom proteome directory. Be sure to use double slashes in the path, otherwise it will not be read in properly.
public string UserSpecifiedProteomeDir { get; internal set; } = "";
public string DecoyIdentifier { get; internal set; } = "DECOY";

//Ask about protease-specific parameter recommendations
public bool AskAboutTopDownParams { get; internal set; } = true;
Expand Down
1 change: 1 addition & 0 deletions MetaMorpheus/GUI/Util/UpdateGUISettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public static bool LoadGUISettings()
Toml.WriteFile(Params, Path.Combine(GlobalVariables.DataDir, @"GUIsettings.toml"), MetaMorpheusTask.tomlConfig);
}

GlobalVariables.DecoyIdentifier = Params.DecoyIdentifier ??= "DECOY";
if (GlobalVariables.MetaMorpheusVersion.Contains("Not a release version"))
{
Params.AskAboutUpdating = false;
Expand Down
2 changes: 1 addition & 1 deletion MetaMorpheus/GuiFunctions/GuiFunctions.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
<ItemGroup>
<PackageReference Include="itext7" Version="8.0.5" />
<PackageReference Include="itext7.bouncy-castle-adapter" Version="8.0.5" />
<PackageReference Include="mzLib" Version="1.0.569" />
<PackageReference Include="mzLib" Version="5.3.1" />
<PackageReference Include="OxyPlot.Wpf" Version="2.0.0" />
<PackageReference Include="Svg" Version="3.4.7" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
using System.Windows.Media.Imaging;
using Easy.Common.Extensions;
using EngineLayer;
using EngineLayer.DatabaseLoading;
using MzLibUtil;
using Omics;
using Readers;
Expand Down Expand Up @@ -95,13 +96,10 @@ private async void LoadDatabase()
try
{
// Load biopolymers asynchronously
var bioPolymers = await Task.Run(() =>
new SearchTask().LoadBioPolymers(
"", new() { new DbForTask(DatabasePath, false) }, true, DecoyType.None, new(), new())
.ToDictionary(p => p.Accession, p => p)
);
var dbLoader = new DatabaseLoadingEngine(new(), [], [], [new DbForTask(DatabasePath, false)], "", DecoyType.None);
var loadingResults = await dbLoader.RunAsync();

_allBioPolymers = bioPolymers;
_allBioPolymers = (loadingResults as DatabaseLoadingEngineResults)!.BioPolymers.ToDictionary(bp => bp.Accession, bp => bp);

if (_allBioPolymers.Count == 0)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
using System.Text;
using System.Threading.Tasks;
using EngineLayer;
using EngineLayer.DatabaseLoading;
using FlashLFQ;
using IO.MzML;
using MassSpectrometry;
Expand Down
Loading
Loading