bigbio · ypriverol · May 28, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -73,6 +73,7 @@ docs/parity-analysis/*
 !docs/parity-analysis/notes/
 !docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md
 !docs/parity-analysis/notes/2026-05-25-spece-tail-exploration.md
+!docs/parity-analysis/notes/2026-05-28-phase2-peak-rank-parity.md
 !docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md
 !docs/parity-analysis/notes/score-psm-trace-artifacts/
 !docs/parity-analysis/notes/score-psm-trace-artifacts/*

diff --git a/crates/msgf-rust/src/bin/msgf-trace.rs b/crates/msgf-rust/src/bin/msgf-trace.rs
@@ -168,6 +168,11 @@ struct Cli {
     /// existing human-readable stderr trace is unaffected.
     #[arg(long)]
     trace_json: Option<PathBuf>,
+    /// Dump the post-filter, post-deconvolution active peak list (sorted by
+    /// rank ascending) for this scan/charge as `rank<TAB>mz<TAB>intensity`
+    /// lines, preceded by a `DUMP_PEAKS` header. Read-only diagnostic.
+    #[arg(long)]
+    dump_peaks: bool,
 }
 
 fn main() -> ExitCode {
@@ -584,6 +589,27 @@ fn run(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
         println!("    PSM.score (from queue) = {}", top1.score);
     }
 
+    // ---------------------------------------------------------------------
+    // Diagnostic: dump the active (post-filter, post-deconvolution) peak list
+    // sorted by rank ascending. Read-only; uses the SAME peak/rank set the
+    // scorer consumes. Lets us compare Rust's kept-peak ranks against Java's.
+    // ---------------------------------------------------------------------
+    if cli.dump_peaks {
+        let dump_charge = charges_to_try.first().copied().unwrap_or(cli.charge_min);
+        let scored = ScoredSpectrum::new(spec, &scorer, dump_charge);
+        let active = scored.dump_active_peaks();
+        println!(
+            "DUMP_PEAKS scan={} charge={} precursor_mz={} active_peaks={}",
+            cli.scan,
+            dump_charge,
+            spec.precursor_mz,
+            active.len()
+        );
+        for (rank, mz, intensity) in &active {
+            println!("{}\t{:.5}\t{:.2}", rank, mz, intensity);
+        }
+    }
+
     // ---------------------------------------------------------------------
     // Diagnostic: per-node GF ScoreDist dump for a specified peptide.
     // ---------------------------------------------------------------------

diff --git a/crates/scoring/src/scoring/scored_spectrum.rs b/crates/scoring/src/scoring/scored_spectrum.rs
@@ -467,6 +467,25 @@ impl<'a> ScoredSpectrum<'a> {
         self.parent_mass
     }
 
+    /// Diagnostic-only accessor: return the active peak list (post-deconvolution
+    /// when `apply_deconvolution` was applied, else the filtered original) as
+    /// `(rank, mz, intensity)` triples sorted by rank ascending (rank 1 = most
+    /// intense). Filtered-out peaks (rank == `u32::MAX`) are skipped.
+    ///
+    /// Read-only — does not affect scoring. Used by `msgf-trace --dump-peaks`
+    /// to compare Rust's kept-peak/rank assignment against Java's.
+    pub fn dump_active_peaks(&self) -> Vec<(u32, f64, f32)> {
+        let (peaks, ranks) = self.active_peaks_and_ranks();
+        let mut out: Vec<(u32, f64, f32)> = peaks
+            .iter()
+            .zip(ranks.iter())
+            .filter(|(_, &rank)| rank != u32::MAX)
+            .map(|(&(mz, intensity), &rank)| (rank, mz, intensity))
+            .collect();
+        out.sort_by(|a, b| a.0.cmp(&b.0));
+        out
+    }
+
     /// Return a cached `round(prefix_score + suffix_score)` split score when
     /// both nominal masses are in-bounds for this spectrum's FastScorer-style
     /// tables. Returns `None` when the cache is unavailable or either index is

diff --git a/crates/search/src/mass_calibrator.rs b/crates/search/src/mass_calibrator.rs
@@ -150,7 +150,7 @@ pub fn learn_calibration_stats(
         originals,
         &prepared.candidates,
         MIN_DE_NOVO_SCORE,
-        constants::MIN_CONFIDENT_PSMS,
+        constants::RESIDUAL_CAP,
     );
 
     if residuals.len() < constants::MIN_CONFIDENT_PSMS {

diff --git a/crates/search/src/precursor_cal.rs b/crates/search/src/precursor_cal.rs
@@ -76,7 +76,18 @@ pub fn adjusted_observed_neutral_mass(raw_neutral: f64, shift_ppm: f64) -> f64 {
 pub mod constants {
     pub const SAMPLING_STRIDE: usize = 10;
     pub const MAX_SAMPLED: usize = 500;
-    pub const MIN_CONFIDENT_PSMS: usize = 200;
+    /// Upper bound on the number of best-SpecEValue residuals fed to the median
+    /// shift estimate. Java's `MassCalibrator` uses the top 200. Decoupled from
+    /// the firing minimum below so high-yield datasets (Astral/TMT) keep using a
+    /// full 200-residual estimate unchanged.
+    pub const RESIDUAL_CAP: usize = 200;
+    /// Minimum confident residuals required to TRUST the learned shift. Lowered
+    /// from 200 to 150: low-res-MS2 datasets (e.g. PXD001819 ion-trap CID) yield
+    /// fewer sub-1e-6 SpecEValue PSMs from the 500-spectrum sample (~193), which
+    /// previously fell just under 200 and skipped calibration entirely. 150 is
+    /// still a robust sample size for a median, and the cap above keeps the
+    /// high-yield datasets' estimate identical.
+    pub const MIN_CONFIDENT_PSMS: usize = 150;
     pub const MAX_SPEC_EVALUE: f64 = 1e-6;
     pub const MIN_SPECKEYS_FOR_PREPASS: usize = 10_000;
 

diff --git a/docs/parity-analysis/notes/2026-05-28-phase2-peak-rank-parity.md b/docs/parity-analysis/notes/2026-05-28-phase2-peak-rank-parity.md
@@ -0,0 +1,78 @@
+# Phase 2 finding: peak-rank assignment is bit-identical to Java (H2 is NULL)
+
+**Date:** 2026-05-28
+**Branch:** `feat/id-rate-pxd001819-tmt`
+**Dataset:** PXD001819, scan 41522 (a documented I5 label-flip scan)
+**Tooling:** `msgf-trace --dump-peaks` (commit `12c1839c`) — dumps the post-filter,
+post-deconvolution active peak list (rank, m/z, intensity).
+
+## Headline
+
+**Rust and Java assign the identical rank to every peak.** Comparing Rust's
+dumped active peak list against Java's per-ion trace ranks, matched by *observed
+peak m/z* (not theoretical-ion m/z), the rank offset is **+0 for all 465 matched
+peaks**. No exceptions.
+
+This **debunks the I5 doc's central hypothesis** (`2026-05-26-score-psm-trace-findings.md`),
+which attributed ~40% of the scoring divergence to H2 (peak-rank assignment) and
+made it the primary fix target for this whole investigation.
+
+## What went wrong in the original I5 analysis
+
+The I5 `analyze.py` (and an even cruder ad-hoc script) aligned Rust↔Java ions by
+`(ion_kind, round(theo_mz/1e-3))` — i.e. by *theoretical* ion m/z. The same
+theoretical m/z can correspond to **different physical ions** in the two peptides
+being compared (e.g. Rust `y/1` at theo_mz X vs Java `y/2+offset` at a
+coincidentally-equal theo_mz X). Those spurious cross-matches produced the
+RANK_DIFF=301 count and the apparent "+2 rank offset."
+
+When you instead match **actual observed peaks by their m/z** (what
+`--dump-peaks` enables), the ranks are identical. The peak ranker
+(`ScoredSpectrum::new` intensity-desc + m/z-asc, precursor-filtered peaks excluded)
+already matches Java's `Spectrum.setRanksOfPeaks()` (intensity-desc via
+`IntensityComparator`, precursor peaks zeroed-but-ranked-at-bottom) for every peak
+that matters.
+
+Confirmed details:
+- Java zeroes precursor peaks but keeps them in the ranked list (sorted to the
+  bottom at intensity 0); Rust removes them. This makes Java's total peak count
+  ~3 higher (max rank 489 vs Rust active 486) but does **not** shift any real
+  peak's rank, because the zeroed peaks sort below every real peak either way.
+- Rust's precursor-filter tolerance (`pof.tolerance`) already equals Java's `mme`
+  (0.5 Da for CID_LowRes) for these offsets — a test swapping to `param.mme` left
+  the active-peak count unchanged at 486 (reverted as a no-op).
+
+## The label-flip still exists, but is not a rank bug
+
+On current master, scan 41522's Rust top-1 is a decoy (`VVYGNIYEIEIDRLFLTDQR`,
+score 13, SpecE 4.66e-4) — Java picks a real peptide. But:
+- The scores are tiny (top non-decoy is 11), so this is a low-quality spectrum
+  near the noise floor.
+- Since peak ranks are identical, the divergence must be in **candidate
+  enumeration (H1)** — whether Rust enumerates/locates the Java-favored peptide in
+  this scan's mass window — and/or **log-prob table values (H3)**, not rank
+  assignment.
+
+## Implication for the +10% goal
+
+- The single lever the whole Phase 2 plan was built around (fix peak-rank
+  assignment) **does not exist** — Rust already matches Java bit-for-bit there.
+- Combined with the external reviewer's "BSA 217/217 top-1 parity" and Phase 0's
+  finding that instrument/tolerance/calibration are already correct, the picture
+  is: **Rust's scoring is at or near parity with Java.** The residual gap
+  (PXD001819 −1.1% after Phase 1b's +53; TMT −5%) is small, concentrated on
+  low-quality / SpecE-tail spectra, and attributable to H1/H3 micro-differences
+  that the n=9+ audit says regress Percolator when "fixed" individually.
+- **+10% over current Rust is not reachable via scoring-path fixes** — the scoring
+  path is essentially correct. The shippable result from this investigation is
+  Phase 1b (+53 PXD, zero-risk calibration nudge).
+
+## Recommended next directions (if ID-rate work continues)
+
+1. **Stop chasing rank/score parity** — it's already there.
+2. If a large ID gain is still wanted, it likely requires *algorithmic* changes
+   (e.g. a different candidate-generation / scoring model), not parity fixes — a
+   research project, not a bench-gated tweak.
+3. H1 (does Rust enumerate the Java-favored peptide in-window?) is the only
+   remaining parity question worth a *cheap* check, but the per-scan evidence
+   suggests these are low-score spectra that don't cross 1% FDR anyway.