Skip to content

Commit d1280a7

Browse files
committed
fix: improve accuracy of preliminary score estimation
- Remove fragment_min/max_mz parameters - Adjust ppm tolerance during preliminary search to account for charge
1 parent 5379198 commit d1280a7

File tree

10 files changed

+33
-54
lines changed

10 files changed

+33
-54
lines changed

Diff for: CHANGELOG.md

+7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [v0.15.0-alpha] (unreleased)
8+
### Added
9+
- Initial support for searching diaPASEF data
10+
### Changed
11+
- Don't deisotope reporter ion regions if MS2-based TMT/iTRAQ is used
12+
- Removed `fragment_min_mz` and `fragment_max_mz` parameters. These were decreasing the accuracy of preliminary scoring estimation when attempting to annotate multiply-charged, high-m/z ions.
13+
714
## [v0.14.7]
815
### Added
916
- Added columns missing from parquet output: `semi_enzymatic` and `missed_cleavages`

Diff for: Cargo.lock

+2-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: crates/sage-cli/src/main.rs

+4-11
Original file line numberDiff line numberDiff line change
@@ -184,21 +184,16 @@ impl Runner {
184184
.sn
185185
.then_some(self.parameters.quant.tmt_settings.level);
186186

187-
let (min_fragment_mz, min_deisotope_mz) = match &self.parameters.quant.tmt {
187+
let min_deisotope_mz = match &self.parameters.quant.tmt {
188188
Some(i) => match self.parameters.quant.tmt_settings.level {
189-
2 => (
190-
i.reporter_masses().first().map(|x| x * (1.0 - 20E-6)),
191-
i.reporter_masses().last().map(|x| x * (1.0 + 20E-6)),
192-
),
193-
_ => (None, None),
189+
2 => i.reporter_masses().last().map(|x| x * (1.0 + 20E-6)),
190+
_ => None,
194191
},
195-
None => (None, None),
192+
None => None,
196193
};
197194

198195
let sp = SpectrumProcessor::new(
199196
self.parameters.max_peaks,
200-
min_fragment_mz.unwrap_or(self.parameters.database.fragment_min_mz),
201-
self.parameters.database.fragment_max_mz,
202197
self.parameters.deisotope,
203198
min_deisotope_mz.unwrap_or(0.0),
204199
);
@@ -268,8 +263,6 @@ impl Runner {
268263
min_precursor_charge: self.parameters.precursor_charge.0,
269264
max_precursor_charge: self.parameters.precursor_charge.1,
270265
max_fragment_charge: self.parameters.max_fragment_charge,
271-
min_fragment_mass: self.parameters.database.fragment_min_mz,
272-
max_fragment_mass: self.parameters.database.fragment_max_mz,
273266
chimera: self.parameters.chimera,
274267
report_psms: self.parameters.report_psms,
275268
wide_window: self.parameters.wide_window,

Diff for: crates/sage-cli/tests/integration.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ fn integration() -> anyhow::Result<()> {
1313
let spectra = sage_cloudpath::util::read_mzml("../../tests/LQSRPAAPPAPGPGQLTLR.mzML", 0, None)?;
1414
assert_eq!(spectra.len(), 1);
1515

16-
let sp = SpectrumProcessor::new(100, 0.0, 1500.0, true, 0.0);
16+
let sp = SpectrumProcessor::new(100, true, 0.0);
1717
let processed = sp.process(spectra[0].clone());
1818
assert!(processed.peaks.len() <= 300);
1919

@@ -27,8 +27,6 @@ fn integration() -> anyhow::Result<()> {
2727
min_precursor_charge: 2,
2828
max_precursor_charge: 4,
2929
max_fragment_charge: Some(1),
30-
min_fragment_mass: 0.0,
31-
max_fragment_mass: 1500.0,
3230
chimera: false,
3331
report_psms: 1,
3432
wide_window: false,

Diff for: crates/sage-cloudpath/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "sage-cloudpath"
3-
version = "0.14.7"
3+
version = "0.15.0-alpha"
44
authors = ["Michael Lazear <[email protected]"]
55
edition = "2021"
66
rust-version = "1.62"

Diff for: crates/sage/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "sage-core"
3-
version = "0.14.7"
3+
version = "0.15.0-alpha"
44
authors = ["Michael Lazear <[email protected]"]
55
edition = "2021"
66
rust-version = "1.62"

Diff for: crates/sage/src/database.rs

+11-14
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,6 @@ pub struct Builder {
6363
pub bucket_size: Option<usize>,
6464

6565
pub enzyme: Option<EnzymeBuilder>,
66-
/// Minimum fragment m/z that will be stored in the database
67-
pub fragment_min_mz: Option<f32>,
68-
/// Maximum fragment m/z that will be stored in the database
69-
pub fragment_max_mz: Option<f32>,
7066
/// Minimum peptide monoisotopic mass that will be fragmented
7167
pub peptide_min_mass: Option<f32>,
7268
/// Maximum peptide monoisotopic mass that will be fragmented
@@ -95,8 +91,6 @@ impl Builder {
9591
let bucket_size = self.bucket_size.unwrap_or(8192).next_power_of_two();
9692
Parameters {
9793
bucket_size,
98-
fragment_min_mz: self.fragment_min_mz.unwrap_or(150.0),
99-
fragment_max_mz: self.fragment_max_mz.unwrap_or(2000.0),
10094
peptide_min_mass: self.peptide_min_mass.unwrap_or(500.0),
10195
peptide_max_mass: self.peptide_max_mass.unwrap_or(5000.0),
10296
ion_kinds: self.ion_kinds.unwrap_or(vec![Kind::B, Kind::Y]),
@@ -120,8 +114,6 @@ impl Builder {
120114
pub struct Parameters {
121115
pub bucket_size: usize,
122116
pub enzyme: EnzymeBuilder,
123-
pub fragment_min_mz: f32,
124-
pub fragment_max_mz: f32,
125117
pub peptide_min_mass: f32,
126118
pub peptide_max_mass: f32,
127119
pub ion_kinds: Vec<Kind>,
@@ -236,8 +228,6 @@ impl Parameters {
236228
}
237229
};
238230
ion_idx_filter
239-
&& ion.monoisotopic_mass >= self.fragment_min_mz
240-
&& ion.monoisotopic_mass <= self.fragment_max_mz
241231
})
242232
.map(move |(_, ion)| Theoretical {
243233
peptide_index: PeptideIx(idx as u32),
@@ -426,8 +416,17 @@ pub struct IndexedQuery<'d> {
426416

427417
impl<'d> IndexedQuery<'d> {
428418
/// Search for a specified `fragment_mz` within the database
429-
pub fn page_search(&self, fragment_mz: f32) -> impl Iterator<Item = &Theoretical> {
430-
let (fragment_lo, fragment_hi) = self.fragment_tol.bounds(fragment_mz);
419+
pub fn page_search(&self, fragment_mz: f32, charge: u8) -> impl Iterator<Item = &Theoretical> {
420+
let mass = fragment_mz * charge as f32;
421+
422+
// Account for multiplication of observed decharged mass
423+
// - relative tolerance needs to be proportionally decreased
424+
let tol = match self.fragment_tol {
425+
Tolerance::Ppm(lo, hi) => Tolerance::Ppm(lo / charge as f32, hi / charge as f32),
426+
Tolerance::Da(_, _) => self.fragment_tol,
427+
};
428+
429+
let (fragment_lo, fragment_hi) = tol.bounds(mass);
431430
let (precursor_lo, precursor_hi) = self.precursor_tol.bounds(self.precursor_mass);
432431

433432
// Locate the left and right page indices that contain matching fragments
@@ -587,8 +586,6 @@ mod test {
587586
max_len: Some(10),
588587
..Default::default()
589588
},
590-
fragment_min_mz: 100.0,
591-
fragment_max_mz: 1000.0,
592589
peptide_min_mass: 150.0,
593590
peptide_max_mass: 5000.0,
594591
ion_kinds: vec![Kind::B, Kind::Y],

Diff for: crates/sage/src/scoring.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -183,8 +183,6 @@ pub struct Scorer<'db> {
183183
pub min_precursor_charge: u8,
184184
pub max_precursor_charge: u8,
185185
pub max_fragment_charge: Option<u8>,
186-
pub min_fragment_mass: f32,
187-
pub max_fragment_mass: f32,
188186
pub chimera: bool,
189187
pub report_psms: usize,
190188

@@ -270,8 +268,7 @@ impl<'db> Scorer<'db> {
270268

271269
for peak in query.peaks.iter() {
272270
for charge in 1..max_fragment_charge {
273-
let mass = peak.mass * charge as f32;
274-
for frag in candidates.page_search(mass) {
271+
for frag in candidates.page_search(peak.mass, charge) {
275272
let idx = frag.peptide_index.0 as usize - candidates.pre_idx_lo;
276273
let sc = &mut hits.preliminary[idx];
277274
if sc.matched == 0 {
@@ -280,6 +277,7 @@ impl<'db> Scorer<'db> {
280277
sc.peptide = frag.peptide_index;
281278
sc.isotope_error = isotope_error;
282279
}
280+
283281
sc.matched += 1;
284282
hits.matched_peaks += 1;
285283
}
@@ -598,6 +596,7 @@ impl<'db> Scorer<'db> {
598596
for charge in 1..max_fragment_charge {
599597
// Experimental peaks are multipled by charge, therefore theoretical are divided
600598
let mz = frag.monoisotopic_mass / charge as f32;
599+
601600
if let Some(peak) = crate::spectrum::select_most_intense_peak(
602601
&query.peaks,
603602
mz,

Diff for: crates/sage/src/spectrum.rs

+2-17
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,6 @@ pub struct Deisotoped {
3939
#[derive(Debug, Clone)]
4040
pub struct SpectrumProcessor {
4141
pub take_top_n: usize,
42-
pub max_fragment_mz: f32,
43-
pub min_fragment_mz: f32,
4442
pub min_deisotope_mz: f32,
4543
pub deisotope: bool,
4644
}
@@ -262,17 +260,9 @@ impl SpectrumProcessor {
262260
/// * `min_fragment_mz`: Keep only fragments >= this m/z
263261
/// * `max_fragment_mz`: Keep only fragments <= this m/z
264262
/// * `deisotope`: Perform deisotoping & charge state deconvolution
265-
pub fn new(
266-
take_top_n: usize,
267-
min_fragment_mz: f32,
268-
max_fragment_mz: f32,
269-
deisotope: bool,
270-
min_deisotope_mz: f32,
271-
) -> Self {
263+
pub fn new(take_top_n: usize, deisotope: bool, min_deisotope_mz: f32) -> Self {
272264
Self {
273265
take_top_n,
274-
min_fragment_mz,
275-
max_fragment_mz,
276266
min_deisotope_mz,
277267
deisotope,
278268
}
@@ -310,11 +300,7 @@ impl SpectrumProcessor {
310300

311301
peaks
312302
.into_iter()
313-
.filter(|peak| {
314-
peak.envelope.is_none()
315-
&& peak.mz >= self.min_fragment_mz
316-
&& peak.mz <= self.max_fragment_mz
317-
})
303+
.filter(|peak| peak.envelope.is_none())
318304
.map(|peak| {
319305
// Convert from MH* to M
320306
let mass = (peak.mz - PROTON) * peak.charge.unwrap_or(1) as f32;
@@ -330,7 +316,6 @@ impl SpectrumProcessor {
330316
.mz
331317
.iter()
332318
.zip(spectrum.intensity.iter())
333-
.filter(|&(mz, _)| *mz >= self.min_fragment_mz && *mz <= self.max_fragment_mz)
334319
.map(|(mz, &intensity)| {
335320
let mass = (mz - PROTON) * 1.0;
336321
Peak { mass, intensity }

Diff for: crates/sage/tests/integration.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ fn check_all_ions_visited(target_fragment_mz: f32, bucket_size: usize) {
6161
// are returned to us by searching the database.
6262
let query = database.query(1000.0, Tolerance::Da(-5000.0, 5000.0), fragment_tol);
6363

64-
for fragment in query.page_search(target_fragment_mz) {
64+
for fragment in query.page_search(target_fragment_mz, 1) {
6565
visited[fragment.peptide_index.0 as usize] += 1;
6666
}
6767

0 commit comments

Comments
 (0)