diff --git a/.gitignore b/.gitignore
index d546a066..39168561 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,6 +73,7 @@ docs/parity-analysis/*
 !docs/parity-analysis/notes/
 !docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md
 !docs/parity-analysis/notes/2026-05-25-spece-tail-exploration.md
+!docs/parity-analysis/notes/2026-05-28-phase2-peak-rank-parity.md
 !docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md
 !docs/parity-analysis/notes/score-psm-trace-artifacts/
 !docs/parity-analysis/notes/score-psm-trace-artifacts/*
diff --git a/crates/msgf-rust/src/bin/msgf-trace.rs b/crates/msgf-rust/src/bin/msgf-trace.rs
index fadbc5bf..7530ea4b 100644
--- a/crates/msgf-rust/src/bin/msgf-trace.rs
+++ b/crates/msgf-rust/src/bin/msgf-trace.rs
@@ -168,6 +168,11 @@ struct Cli {
     /// existing human-readable stderr trace is unaffected.
     #[arg(long)]
     trace_json: Option<PathBuf>,
+    /// Dump the post-filter, post-deconvolution active peak list (sorted by
+    /// rank ascending) for this scan/charge as `rank<TAB>mz<TAB>intensity`
+    /// lines, preceded by a `DUMP_PEAKS` header. Read-only diagnostic.
+    #[arg(long)]
+    dump_peaks: bool,
 }
 
 fn main() -> ExitCode {
@@ -584,6 +589,27 @@ fn run(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
         println!("    PSM.score (from queue) = {}", top1.score);
     }
 
+    // ---------------------------------------------------------------------
+    // Diagnostic: dump the active (post-filter, post-deconvolution) peak list
+    // sorted by rank ascending. Read-only; uses the SAME peak/rank set the
+    // scorer consumes. Lets us compare Rust's kept-peak ranks against Java's.
+    // ---------------------------------------------------------------------
+    if cli.dump_peaks {
+        let dump_charge = charges_to_try.first().copied().unwrap_or(cli.charge_min);
+        let scored = ScoredSpectrum::new(spec, &scorer, dump_charge);
+        let active = scored.dump_active_peaks();
+        println!(
+            "DUMP_PEAKS scan={} charge={} precursor_mz={} active_peaks={}",
+            cli.scan,
+            dump_charge,
+            spec.precursor_mz,
+            active.len()
+        );
+        for (rank, mz, intensity) in &active {
+            println!("{}\t{:.5}\t{:.2}", rank, mz, intensity);
+        }
+    }
+
     // ---------------------------------------------------------------------
     // Diagnostic: per-node GF ScoreDist dump for a specified peptide.
     // ---------------------------------------------------------------------
diff --git a/crates/scoring/src/scoring/scored_spectrum.rs b/crates/scoring/src/scoring/scored_spectrum.rs
index ca7ffc49..9d7ade33 100644
--- a/crates/scoring/src/scoring/scored_spectrum.rs
+++ b/crates/scoring/src/scoring/scored_spectrum.rs
@@ -467,6 +467,25 @@ impl<'a> ScoredSpectrum<'a> {
         self.parent_mass
     }
 
+    /// Diagnostic-only accessor: return the active peak list (post-deconvolution
+    /// when `apply_deconvolution` was applied, else the filtered original) as
+    /// `(rank, mz, intensity)` triples sorted by rank ascending (rank 1 = most
+    /// intense). Filtered-out peaks (rank == `u32::MAX`) are skipped.
+    ///
+    /// Read-only — does not affect scoring. Used by `msgf-trace --dump-peaks`
+    /// to compare Rust's kept-peak/rank assignment against Java's.
+    pub fn dump_active_peaks(&self) -> Vec<(u32, f64, f32)> {
+        let (peaks, ranks) = self.active_peaks_and_ranks();
+        let mut out: Vec<(u32, f64, f32)> = peaks
+            .iter()
+            .zip(ranks.iter())
+            .filter(|(_, &rank)| rank != u32::MAX)
+            .map(|(&(mz, intensity), &rank)| (rank, mz, intensity))
+            .collect();
+        out.sort_by(|a, b| a.0.cmp(&b.0));
+        out
+    }
+
     /// Return a cached `round(prefix_score + suffix_score)` split score when
     /// both nominal masses are in-bounds for this spectrum's FastScorer-style
     /// tables. Returns `None` when the cache is unavailable or either index is
diff --git a/crates/search/src/mass_calibrator.rs b/crates/search/src/mass_calibrator.rs
index a227cf73..ca55246f 100644
--- a/crates/search/src/mass_calibrator.rs
+++ b/crates/search/src/mass_calibrator.rs
@@ -150,7 +150,7 @@ pub fn learn_calibration_stats(
         originals,
         &prepared.candidates,
         MIN_DE_NOVO_SCORE,
-        constants::MIN_CONFIDENT_PSMS,
+        constants::RESIDUAL_CAP,
     );
 
     if residuals.len() < constants::MIN_CONFIDENT_PSMS {
diff --git a/crates/search/src/precursor_cal.rs b/crates/search/src/precursor_cal.rs
index 755e7659..3b82d154 100644
--- a/crates/search/src/precursor_cal.rs
+++ b/crates/search/src/precursor_cal.rs
@@ -76,7 +76,18 @@ pub fn adjusted_observed_neutral_mass(raw_neutral: f64, shift_ppm: f64) -> f64 {
 pub mod constants {
     pub const SAMPLING_STRIDE: usize = 10;
     pub const MAX_SAMPLED: usize = 500;
-    pub const MIN_CONFIDENT_PSMS: usize = 200;
+    /// Upper bound on the number of best-SpecEValue residuals fed to the median
+    /// shift estimate. Java's `MassCalibrator` uses the top 200. Decoupled from
+    /// the firing minimum below so high-yield datasets (Astral/TMT) keep using a
+    /// full 200-residual estimate unchanged.
+    pub const RESIDUAL_CAP: usize = 200;
+    /// Minimum confident residuals required to TRUST the learned shift. Lowered
+    /// from 200 to 150: low-res-MS2 datasets (e.g. PXD001819 ion-trap CID) yield
+    /// fewer sub-1e-6 SpecEValue PSMs from the 500-spectrum sample (~193), which
+    /// previously fell just under 200 and skipped calibration entirely. 150 is
+    /// still a robust sample size for a median, and the cap above keeps the
+    /// high-yield datasets' estimate identical.
+    pub const MIN_CONFIDENT_PSMS: usize = 150;
     pub const MAX_SPEC_EVALUE: f64 = 1e-6;
     pub const MIN_SPECKEYS_FOR_PREPASS: usize = 10_000;
 
diff --git a/docs/parity-analysis/notes/2026-05-28-phase2-peak-rank-parity.md b/docs/parity-analysis/notes/2026-05-28-phase2-peak-rank-parity.md
new file mode 100644
index 00000000..985f7133
--- /dev/null
+++ b/docs/parity-analysis/notes/2026-05-28-phase2-peak-rank-parity.md
@@ -0,0 +1,78 @@
+# Phase 2 finding: peak-rank assignment is bit-identical to Java (H2 is NULL)
+
+**Date:** 2026-05-28
+**Branch:** `feat/id-rate-pxd001819-tmt`
+**Dataset:** PXD001819, scan 41522 (a documented I5 label-flip scan)
+**Tooling:** `msgf-trace --dump-peaks` (commit `12c1839c`) — dumps the post-filter,
+post-deconvolution active peak list (rank, m/z, intensity).
+
+## Headline
+
+**Rust and Java assign the identical rank to every peak.** Comparing Rust's
+dumped active peak list against Java's per-ion trace ranks, matched by *observed
+peak m/z* (not theoretical-ion m/z), the rank offset is **+0 for all 465 matched
+peaks**. No exceptions.
+
+This **debunks the I5 doc's central hypothesis** (`2026-05-26-score-psm-trace-findings.md`),
+which attributed ~40% of the scoring divergence to H2 (peak-rank assignment) and
+made it the primary fix target for this whole investigation.
+
+## What went wrong in the original I5 analysis
+
+The I5 `analyze.py` (and an even cruder ad-hoc script) aligned Rust↔Java ions by
+`(ion_kind, round(theo_mz/1e-3))` — i.e. by *theoretical* ion m/z. The same
+theoretical m/z can correspond to **different physical ions** in the two peptides
+being compared (e.g. Rust `y/1` at theo_mz X vs Java `y/2+offset` at a
+coincidentally-equal theo_mz X). Those spurious cross-matches produced the
+RANK_DIFF=301 count and the apparent "+2 rank offset."
+
+When you instead match **actual observed peaks by their m/z** (what
+`--dump-peaks` enables), the ranks are identical. The peak ranker
+(`ScoredSpectrum::new` intensity-desc + m/z-asc, precursor-filtered peaks excluded)
+already matches Java's `Spectrum.setRanksOfPeaks()` (intensity-desc via
+`IntensityComparator`, precursor peaks zeroed-but-ranked-at-bottom) for every peak
+that matters.
+
+Confirmed details:
+- Java zeroes precursor peaks but keeps them in the ranked list (sorted to the
+  bottom at intensity 0); Rust removes them. This makes Java's total peak count
+  ~3 higher (max rank 489 vs Rust active 486) but does **not** shift any real
+  peak's rank, because the zeroed peaks sort below every real peak either way.
+- Rust's precursor-filter tolerance (`pof.tolerance`) already equals Java's `mme`
+  (0.5 Da for CID_LowRes) for these offsets — a test swapping to `param.mme` left
+  the active-peak count unchanged at 486 (reverted as a no-op).
+
+## The label-flip still exists, but is not a rank bug
+
+On current master, scan 41522's Rust top-1 is a decoy (`VVYGNIYEIEIDRLFLTDQR`,
+score 13, SpecE 4.66e-4) — Java picks a real peptide. But:
+- The scores are tiny (top non-decoy is 11), so this is a low-quality spectrum
+  near the noise floor.
+- Since peak ranks are identical, the divergence must be in **candidate
+  enumeration (H1)** — whether Rust enumerates/locates the Java-favored peptide in
+  this scan's mass window — and/or **log-prob table values (H3)**, not rank
+  assignment.
+
+## Implication for the +10% goal
+
+- The single lever the whole Phase 2 plan was built around (fix peak-rank
+  assignment) **does not exist** — Rust already matches Java bit-for-bit there.
+- Combined with the external reviewer's "BSA 217/217 top-1 parity" and Phase 0's
+  finding that instrument/tolerance/calibration are already correct, the picture
+  is: **Rust's scoring is at or near parity with Java.** The residual gap
+  (PXD001819 −1.1% after Phase 1b's +53; TMT −5%) is small, concentrated on
+  low-quality / SpecE-tail spectra, and attributable to H1/H3 micro-differences
+  that the n=9+ audit says regress Percolator when "fixed" individually.
+- **+10% over current Rust is not reachable via scoring-path fixes** — the scoring
+  path is essentially correct. The shippable result from this investigation is
+  Phase 1b (+53 PXD, zero-risk calibration nudge).
+
+## Recommended next directions (if ID-rate work continues)
+
+1. **Stop chasing rank/score parity** — it's already there.
+2. If a large ID gain is still wanted, it likely requires *algorithmic* changes
+   (e.g. a different candidate-generation / scoring model), not parity fixes — a
+   research project, not a bench-gated tweak.
+3. H1 (does Rust enumerate the Java-favored peptide in-window?) is the only
+   remaining parity question worth a *cheap* check, but the per-scan evidence
+   suggests these are low-score spectra that don't cross 1% FDR anyway.
diff --git a/docs/superpowers/plans/2026-05-28-id-rate-pxd001819-tmt-plan.md b/docs/superpowers/plans/2026-05-28-id-rate-pxd001819-tmt-plan.md
new file mode 100644
index 00000000..a405f633
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-28-id-rate-pxd001819-tmt-plan.md
@@ -0,0 +1,385 @@
+# PXD001819 + TMT ID-rate Improvement Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+>
+> **This is a bench-gated investigation, not a deterministic build.** The ultimate
+> pass/fail gate for each change is the Percolator 1%-FDR PSM count on the VM, not a
+> unit test. Phase 0 is a hard prerequisite: its measurements decide whether Phase 1
+> has any yield. Phase 2's exact code edit is determined by a trace whose result is
+> not known in advance — that task is written as an investigation protocol with a
+> concrete fix location, candidate rules, and verification, NOT a pre-written diff.
+
+**Goal:** Lift PXD001819 (14,755) and TMT (9,605) PSM counts @1% FDR toward +10% over current Rust, without regressing wall >3% or regressing Astral (36,715).
+
+**Architecture:** One feature branch (`feat/id-rate-pxd001819-tmt`), one commit per change, each bench-gated on all three datasets via the existing VM harness, with in-place revert when the gate fails. Highest-leverage / lowest-risk first.
+
+**Tech Stack:** Rust (workspace crates: `scoring`, `search`, `input`, `output`, `msgf-rust`); bench VM `pride-linux-vm` via SSH control socket `/tmp/msgfplus-bench.sock`; Percolator 3.7.1 in Docker; Python stdlib for the I5 trace harness.
+
+---
+
+## Conventions used throughout
+
+**Build (local):** `cargo build --release -p msgf-rust` (the committed `.cargo/config.toml` sets `target-cpu=sandybridge`).
+
+**Bit-identical regression gate (local):**
+```bash
+cargo test --release -p msgf-rust precursor_cal_off_pin_tsv_match_golden_after_sort
+```
+Expected: `test result: ok. 1 passed`. (Phase 2 changes top-1 selection, so this gate will legitimately change — see Task 2a Step 6 for how to regenerate goldens. Phases 0/1-diagnostic/3 must keep it green.)
+
+**Workspace tests (local), CI skip list:**
+```bash
+cargo test --release --workspace -- \
+  --skip charge_missing_spectrum_uses_per_charge_scored_spec \
+  --skip spectrum_without_charge_tries_charge_range \
+  --skip known_peptide_appears_in_top_n \
+  --skip read_bsa_canno_text_format \
+  --skip read_tryp_pig_bov_revcat_csarr_cnlcp \
+  --skip tryp_pig_bov_revcat_full_set_loads \
+  --skip match_spectra_output_invariant_across_thread_counts
+```
+
+**Clippy gate (local):** `cargo clippy --workspace --all-targets -- -D warnings` → clean.
+
+**VM bench (per change):** ship source, build, run 3 datasets cal=auto, run Percolator. The canonical baseline numbers to beat/hold:
+
+| Dataset | Rust @1% FDR (baseline) | Wall (baseline) |
+|---|---:|---:|
+| PXD001819 | 14,755 | ~0:54 |
+| TMT | 9,605 | ~2:33 |
+| Astral | 36,715 | ~6:28 |
+
+**Bench ship gate (per change):** gains PXD or TMT @1% FDR; regresses none of PXD/TMT/Astral beyond ~±0.3% (≈ ±45 PXD, ±29 TMT, ±110 Astral); wall within ~3% on all three. Otherwise `git revert` the commit in place.
+
+**VM socket precondition:** every VM step needs `ssh -S /tmp/msgfplus-bench.sock -O check root@pride-linux-vm` to succeed. If it fails, STOP and ask the human to run `ssh -M -S /tmp/msgfplus-bench.sock -fN root@pride-linux-vm`.
+
+---
+
+## Phase 0 — Diagnostic (prerequisite; no code change)
+
+### Task 0: Measure what PXD001819 and TMT actually resolve to
+
+**Files:** none (measurement only). Produces `docs/parity-analysis/notes/2026-05-28-id-rate-phase0-diagnostic.md` (gitignored notes dir; local record).
+
+- [ ] **Step 1: Verify the VM socket is alive**
+
+Run:
+```bash
+ssh -S /tmp/msgfplus-bench.sock -O check root@pride-linux-vm
+```
+Expected: `Master running (pid=NNNN)`. If not, STOP and request the human re-establish it.
+
+- [ ] **Step 2: Capture each Rust run's resolver + calibration stderr**
+
+The previous bench logs are at `/srv/data/msgf-bench/bench-v2024-results/{pxd001819,tmt,astral}-rust-auto.log`. Extract the resolver + calibration lines:
+```bash
+ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm \
+  "for d in pxd001819 tmt astral; do echo \"== \$d ==\"; grep -iE 'Param resolver|instrument =|precursor.?cal|MassCalibrator|calibrat|confident|high.res|tolerance' /srv/data/msgf-bench/bench-v2024-results/\${d}-rust-auto.log; done"
+```
+Expected: lines like `Param resolver: auto-detected dominant activation method = HCD (instrument = ...)`. Record the resolved instrument string per dataset.
+
+- [ ] **Step 3: Read the true instrument analyzer from each mzML**
+
+```bash
+ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm \
+  "for f in /srv/data/msgf-bench/data/UPS1_5000amol_R1.mzML /srv/data/msgf-bench/tmt-data/a05058.mzML /srv/data/msgf-bench/astral-data/LFQ_Astral_DDA_15min_50ng_Condition_A_REP1.mzML; do echo \"== \$f ==\"; grep -aoE 'MS:100(0079|0083|0084|0264|0484|0624|1906)|FTMS|ITMS|orbitrap|Q Exactive|Astral|ion trap' \$f | sort | uniq -c | head; done"
+```
+Map cvParams: `MS:1000484` orbitrap, `MS:1000079` FTICR, `MS:1000264` ion trap, `MS:1000083` radial ejection ion trap. This tells the *true* MS2 analyzer.
+
+- [ ] **Step 4: Determine the tolerance branch actually taken**
+
+For each dataset, the scoring/feature tolerance is high-res (20 ppm) iff the resolved `InstrumentType::is_high_resolution()` is true. Find which `InstrumentType` enum value `is_high_resolution()` returns true for:
+```bash
+grep -n -A8 "fn is_high_resolution" crates/model/src/*.rs crates/scoring/src/*.rs 2>/dev/null
+```
+Cross-reference the Step-2 resolved instrument against this. Record per dataset: `tolerance = 20ppm | 0.5Da`.
+
+- [ ] **Step 5: Write the diagnostic table**
+
+Create `docs/parity-analysis/notes/2026-05-28-id-rate-phase0-diagnostic.md` with the filled table:
+```markdown
+| Dataset | True MS2 analyzer | Resolved instrument | is_high_resolution | Feature/scoring tol | Calibration fired? |
+|---|---|---|---|---|---|
+| PXD001819 | ... | ... | ... | ... | ... |
+| TMT | ... | ... | ... | ... | ... |
+| Astral | (orbitrap/Astral) | (high-res) | true | 20ppm | yes (reference) |
+```
+
+- [ ] **Step 6: Decide Phase 1 scope (record in the same note)**
+
+Decision rules:
+- If PXD or TMT shows `is_high_resolution = false` / `tol = 0.5Da` but the true analyzer is orbitrap/FT → **Phase 1a is live** (iter20-style win available).
+- If calibration shows "skipped" on PXD or TMT → **Phase 1b is live**.
+- If both are already high-res + calibrated → skip Phase 1, go to Phase 2.
+
+Write the decision explicitly. No commit (notes dir is gitignored); this task gates the rest.
+
+---
+
+## Phase 1 — Config levers (only the sub-tasks Phase 0 marked "live")
+
+### Task 1a: Instrument-resolution / tolerance fix (CONDITIONAL on Phase 0 Step 6)
+
+**Files:**
+- Modify: `crates/msgf-rust/src/bin/msgf-rust.rs` (instrument resolution at ~L585–631) — exact edit depends on the Phase-0 root cause (see Step 2).
+- Possibly: `crates/model/src/<instrument module>.rs` (`is_high_resolution` mapping) or `crates/search/src/match_engine.rs::compute_psm_features` (tolerance branch).
+
+- [ ] **Step 1: Reproduce the mis-resolution locally**
+
+If PXD001819's mzML is available locally, run; otherwise reason from the Phase-0 evidence. Confirm the code path: does `detect_instrument_type` return the orbitrap type, and does that type's `is_high_resolution()` return true? If detection returns `None` and the code falls back to low-res (`crates/msgf-rust/src/bin/msgf-rust.rs:605` comment "None ⇒ resolver picks LowRes"), the bug is in detection, not the tolerance branch.
+
+- [ ] **Step 2: Make the targeted fix**
+
+Two likely shapes (pick per Step 1 evidence):
+- **Detection gap:** `detect_instrument_type` fails to recognize the analyzer cvParam present in the mzML. Add the missing cvParam mapping in `crates/input/src/mzml.rs` (the `CV_ANALYZER_*` constants near L20–40 and the match at ~L722–735).
+- **Tolerance-branch gap:** detection is correct but `compute_psm_features` / scoring tolerance still uses 0.5 Da. Route the resolved high-res `InstrumentType` into the `is_high_resolution()` check.
+
+Show the diff in the commit; do not guess it here.
+
+- [ ] **Step 3: Local gates**
+
+Run the bit-identical gate, workspace tests, clippy (commands in Conventions). The bit-identical golden is cal=off low-res, so a high-res-detection fix should NOT change it (different code path) — expected still green. If it changes, investigate before proceeding.
+
+- [ ] **Step 4: Commit**
+```bash
+git add -A
+git commit -m "fix(resolve): detect high-res instrument so 20ppm tolerance engages on PXD/TMT"
+```
+
+- [ ] **Step 5: VM bench + gate**
+
+Ship, build, bench all 3 datasets (use the bench recipe in Task 2a Step 5). Apply the bench ship gate. If it regresses, `git revert HEAD` and record the result in the Phase-0 note.
+
+### Task 1b: Calibration engagement (CONDITIONAL on Phase 0 Step 6)
+
+**Files:**
+- Modify: the MassCalibrator confident-PSM guard (find it):
+```bash
+grep -rn "confident\|min.*confident\|200\|calibrat" crates/scoring/src crates/search/src --include=*.rs | grep -i cal | head
+```
+
+- [ ] **Step 1: Locate the skip guard and log the actual count**
+
+Find where calibration decides to skip (the "<200 confident PSMs" guard from memory). Confirm via the Phase-0 stderr what count PXD/TMT hit.
+
+- [ ] **Step 2: Decide the change**
+
+If the guard is skipping with, say, 150 confident PSMs, evaluate lowering the threshold OR using a wider confident-PSM definition. This is a parameter change to a single constant — show it in the commit.
+
+- [ ] **Step 3: Local gates + commit**
+```bash
+cargo test --release -p msgf-rust precursor_cal_off_pin_tsv_match_golden_after_sort
+git add -A && git commit -m "tune(cal): lower confident-PSM guard so calibration engages on PXD/TMT"
+```
+
+- [ ] **Step 4: VM bench + gate** (same recipe as Task 2a Step 5; revert if regresses).
+
+---
+
+## Phase 2 — Label-flip gap (hot-path; investigation + bench gate)
+
+### Task 2a: H2 peak-rank assignment — match Java's `getPeakByMass` rule
+
+**Files:**
+- Modify: `crates/scoring/src/scoring/scored_spectrum.rs` — `nearest_peak_rank_in` (~L897–918) and/or the rank-assignment sort in `ScoredSpectrum::new` (~L199–237).
+- Test: `crates/scoring/src/scoring/scored_spectrum.rs` (inline `#[cfg(test)]` module).
+
+- [ ] **Step 1: Verify the VM socket; rebuild msgf-trace if needed**
+
+```bash
+ssh -S /tmp/msgfplus-bench.sock -O check root@pride-linux-vm
+```
+The I5 artifacts are committed at `docs/parity-analysis/notes/score-psm-trace-artifacts/`. The 5 traced scans: 41522, 34685, 23272, 23082, 16629.
+
+- [ ] **Step 2: Identify one concrete RANK_DIFF ion**
+
+From the committed artifacts, find the first RANK_DIFF ion on scan 41522:
+```bash
+cd docs/parity-analysis/notes/score-psm-trace-artifacts
+python3 analyze.py 2>/dev/null | grep -i rank_diff | head
+# Then inspect the specific ion in rust-trace-scan-41522.json:
+python3 -c "import json; d=json.load(open('rust-trace-scan-41522.json')); [print(p['peptide'], [i for i in p['ions'] if 'rank' in i][:3]) for p in d]" 2>/dev/null | head
+```
+Record: `(theo_mz, rust_rank, java_rank)` for the first divergent ion. Decompress the Java side if needed: `gunzip -k java-trace-scan-41522.log.gz`.
+
+- [ ] **Step 3: Read Java's `getPeakByMass` rule**
+
+The Java reference is on the bench VM clone `/srv/data/msgf-bench/java-legacy-trace/`. Find the peak-selection rule:
+```bash
+ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm \
+  "grep -rn -A20 'getPeakByMass' /srv/data/msgf-bench/java-legacy-trace/src/main/java/ | head -40"
+```
+Determine Java's tie-break when multiple peaks fall in tolerance: first-in-list? closest-m/z? highest-intensity? Rust currently uses **highest-intensity, strict `>` (first wins on tie)**. Record the exact divergence.
+
+- [ ] **Step 4: Write a failing unit test that encodes Java's rule**
+
+In `scored_spectrum.rs` tests, construct a tiny spectrum with two peaks inside one tolerance window where Java's rule and Rust's current rule disagree (use the real (theo_mz, ranks) from Step 2 if it reproduces, else a synthetic case matching the rule found in Step 3). Assert `nearest_peak_rank_in` returns Java's choice.
+```rust
+#[test]
+fn nearest_peak_rank_matches_java_getpeakbymass_tiebreak() {
+    // two peaks within tol of target; Java picks <rule from Step 3>
+    // assert the rank returned equals Java's pick
+}
+```
+Run: `cargo test --release -p scoring nearest_peak_rank_matches_java -- --nocapture` → Expected: FAIL.
+
+- [ ] **Step 5: Implement the rule change; verify the unit test passes**
+
+Change the selection in `nearest_peak_rank_in` (and the rank-assignment sort if the divergence is there) to match Java. Run the unit test → Expected: PASS. Then run the full `scoring` crate tests.
+
+- [ ] **Step 6: Regenerate the bit-identical golden (top-1 legitimately changes)**
+
+This change alters top-1 selection, so `precursor_cal_off.pin/.tsv` goldens shift. Regenerate them per the repo's golden-update procedure:
+```bash
+grep -rn "precursor_cal_off_pin_tsv_match_golden" crates/msgf-rust/tests/ | head
+# Follow the test's documented regeneration path (typically an env var or a
+# regen helper); inspect the test to confirm before regenerating.
+```
+Commit the regenerated goldens together with the code change so the gate stays meaningful.
+
+- [ ] **Step 7: Commit**
+```bash
+git add -A
+git commit -m "fix(scoring): match Java getPeakByMass tie-break in peak-rank assignment (H2)"
+```
+
+- [ ] **Step 8: Re-run the I5 trace harness on the VM; confirm RANK_DIFF drops**
+
+Rebuild msgf-trace on the VM, re-run the 5-scan trace, re-run `analyze.py`. Expected: RANK_DIFF count drops from 301; LOGPROB_DIFF drops proportionally. If RANK_DIFF does NOT drop, the rule change was wrong — revisit Step 3.
+
+- [ ] **Step 9: VM bench + gate (the decisive test)**
+
+Bench recipe:
+```bash
+# ship source (rsync/scp changed crates), then on VM:
+ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm \
+  "cd /srv/data/msgf-bench/iter5-build && PATH=\$HOME/.cargo/bin:\$PATH cargo build --release -p msgf-rust"
+# run 3 datasets cal=auto (reuse run_bench_*.sh pattern), then percolator:
+ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm \
+  "for l in pxd001819-rust-auto astral-rust-auto tmt-rust-auto; do bash /srv/data/msgf-bench/run_percolator_docker.sh /srv/data/msgf-bench/bench-<TAG>-results/\$l.pin /srv/data/msgf-bench/bench-<TAG>-percolator \$l; done"
+```
+Apply the bench ship gate. **If Percolator regresses on any dataset beyond noise, `git revert HEAD` (and the goldens commit) — this is the n=9+ audit risk materializing.** Record the result.
+
+### Task 2b: H3 log-prob indexing (CONDITIONAL — only if 2a passed its gate)
+
+**Files:**
+- Modify: `crates/scoring/src/scoring/rank_scorer.rs` (table indexing / clamping).
+- Test: inline `#[cfg(test)]` in `rank_scorer.rs`.
+
+- [ ] **Step 1: Re-run analyze.py; isolate a pure-H3 ion**
+
+After 2a, find an ion with LOGPROB_DIFF but NO RANK_DIFF (same rank, different value) — there were 307 such cases pre-fix. Record `(partition, ion_type, rank, rust_logprob, java_logprob)`.
+
+- [ ] **Step 2: Compare Rust's table lookup to Java's for that ion**
+
+Trace `rank_scorer.rs` indexing (`idx = min(rank, max_rank).max(1) - 1`; missing uses `max_rank` slot) against Java's `getNodeScore` table lookup for the same (partition, ion, rank). Identify the off-by-one / clamping / table-content difference.
+
+- [ ] **Step 3: Failing unit test encoding Java's lookup**
+
+Construct a `RankScorer` from a known partition table and assert the log-prob at a specific rank matches Java's value for the recorded ion. Run → Expected FAIL.
+
+- [ ] **Step 4: Fix indexing; unit test passes; regenerate goldens; commit**
+```bash
+cargo test --release -p scoring <test_name>
+git add -A && git commit -m "fix(scoring): align per-rank log-prob table indexing with Java (H3)"
+```
+
+- [ ] **Step 5: VM bench + gate** (same recipe as 2a Step 9; revert if regresses).
+
+---
+
+## Phase 3 — Additive PIN features (safety net; only if Phases 1–2 < +10%)
+
+### Task 3a: Add `MeanMatchedRank` PIN column
+
+**Files:**
+- Modify: `crates/search/src/psm.rs` (`PsmFeatures` — add field).
+- Modify: `crates/search/src/match_engine.rs::compute_psm_features` (compute it).
+- Modify: `crates/output/src/pin.rs` (`write_header` + `write_psm_row` — emit it, inserted before `Peptide` next to `EdgeScore`).
+- Test: `crates/output/src/pin.rs` tests + `crates/output/tests/output_pin_schema_parity.rs`.
+
+- [ ] **Step 1: Failing test for the new column in the header**
+
+In `crates/output/tests/output_pin_schema_parity.rs`, add an assertion that the header contains `MeanMatchedRank` at the expected position. Run → Expected FAIL.
+
+- [ ] **Step 2: Add the field to `PsmFeatures`**
+```rust
+// crates/search/src/psm.rs, in PsmFeatures
+pub mean_matched_rank: f32,
+```
+
+- [ ] **Step 3: Compute it in `compute_psm_features`**
+
+After the matched-ion loop, average `nearest_peak_rank` over matched b/y ions (0.0 if none matched):
+```rust
+// crates/search/src/match_engine.rs, in compute_psm_features
+let mean_matched_rank = if matched_rank_count > 0 {
+    matched_rank_sum as f32 / matched_rank_count as f32
+} else { 0.0 };
+// ...set features.mean_matched_rank = mean_matched_rank;
+```
+(Accumulate `matched_rank_sum`/`matched_rank_count` inside the existing matched-ion loop where `nearest_peak_rank` is already called.)
+
+- [ ] **Step 4: Emit the column**
+
+In `crates/output/src/pin.rs::write_header`, insert `MeanMatchedRank` immediately before `EdgeScore`; in `write_psm_row`, write `features.mean_matched_rank` in the same position. Keep `EdgeScore` and `Peptide`/`Proteins` last.
+
+- [ ] **Step 5: Update schema-parity test + run**
+```bash
+cargo test --release -p output
+cargo test --release --workspace -- <CI skip list>
+```
+Expected: PASS (column count +1 everywhere it's asserted).
+
+- [ ] **Step 6: Commit**
+```bash
+git add -A && git commit -m "feat(pin): add additive MeanMatchedRank feature column"
+```
+
+- [ ] **Step 7: VM bench + gate**
+
+Additive columns never change top-1, so PXD/TMT/Astral PSM *search* output is byte-identical pre-Percolator except the new column; the only delta is whether Percolator extracts signal from it. Bench all 3; keep if it gains, revert if it regresses (it should at worst be flat).
+
+### Task 3b: Add `ScoreFractionTop1Split` PIN column (only if 3a flat and still < target)
+
+**Files:**
+- Modify: `crates/scoring/src/scoring/psm_score.rs::score_psm` — return the max single-split contribution alongside the sum (change return type to `(f32, f32)` or add a sibling fn to avoid disturbing callers).
+- Modify: `crates/search/src/psm.rs`, `match_engine.rs`, `crates/output/src/pin.rs` as in 3a.
+
+- [ ] **Step 1: Failing header test** (as 3a Step 1, for `ScoreFractionTop1Split`).
+- [ ] **Step 2: Track max split in `score_psm`**
+
+Inside the split loop that accumulates `total += contribution`, also track `max_contrib = max_contrib.max(contribution)`. Expose `max_contrib / total` (guard total==0 → 0.0). To avoid perturbing the hot `score_psm` signature used in ranking, add a separate `score_psm_with_split_max(...) -> (f32, f32)` used only in `compute_psm_features`, OR compute the fraction in `compute_psm_features` by re-deriving splits (prefer the former to avoid double work).
+
+- [ ] **Step 3–6:** wire field → compute → emit → schema test → bench gate, exactly as 3a Steps 2–7.
+
+---
+
+## Final: cumulative bench + branch close-out
+
+### Task 4: Cumulative bench, PR, ship/revert reconciliation
+
+- [ ] **Step 1: Full 3-dataset bench at branch HEAD**
+
+Confirm the cumulative PXD/TMT/Astral @1% FDR and walls vs baseline. Build the result table.
+
+- [ ] **Step 2: Reconcile against the stretch target**
+
+Record cumulative deltas. If +10% reached, note it. If not, the shipped subset is whatever net-gained under the hard gate (the +10% is a direction, not a revert-all gate per the spec).
+
+- [ ] **Step 3: Open the PR**
+```bash
+git push -u origin feat/id-rate-pxd001819-tmt
+gh pr create --base dev --title "perf(id-rate): close PXD001819 + TMT label-flip gap vs Java" --body "<results table + per-phase outcomes + reverts>"
+```
+
+- [ ] **Step 4: Watch CI to green** (Lint + 3 OS tests + CodeRabbit), fix any failures.
+
+---
+
+## Self-review notes
+
+- **Spec coverage:** Phase 0 → Task 0; Phase 1a → Task 1a; Phase 1b → Task 1b; Phase 2 H2 → Task 2a; Phase 2 H3 → Task 2b; Phase 3 → Tasks 3a/3b; success-criteria reconciliation → Task 4. All spec sections covered.
+- **Conditionality is intentional:** Tasks 1a/1b/2b/3a/3b are explicitly gated on prior-task outcomes — this matches the spec's "outcome of each phase decides the next." An executor must honor the CONDITIONAL markers, not run every task blindly.
+- **Phase 2 code is deliberately not pre-written:** the exact tie-break/indexing edit is unknowable until Steps 2–3 of each task read the real trace + Java source. The tasks pin the file:line, the candidate rules, the unit-test shape, and the bench gate — which is the maximum honest specificity here.
+- **Golden regeneration** is called out explicitly in 2a/2b because those tasks change top-1 selection and will move the committed PIN/TSV goldens.
diff --git a/docs/superpowers/specs/2026-05-28-id-rate-pxd001819-tmt-design.md b/docs/superpowers/specs/2026-05-28-id-rate-pxd001819-tmt-design.md
new file mode 100644
index 00000000..4b6cc4d3
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-28-id-rate-pxd001819-tmt-design.md
@@ -0,0 +1,179 @@
+# ID-rate improvement for PXD001819 + TMT — design
+
+**Date:** 2026-05-28
+**Branch:** `feat/id-rate-pxd001819-tmt`
+**Type:** Phased, bench-gated investigation (outcome of each phase decides the next)
+
+## Goal
+
+Increase peptide-spectrum-match (PSM) counts at 1% FDR (Percolator) for the two
+datasets where msgf-rust currently trails or barely matches upstream Java
+MS-GF+ v2024.03.26:
+
+| Dataset | Current Rust @1% | Upstream Java @1% | Stretch target (+10% over Rust) |
+|---|---:|---:|---:|
+| PXD001819 (UPS1 yeast tryptic) | 14,755 | 14,974 | ~16,230 |
+| TMT (a05058, PXD007683)        | 9,605  | 10,115 | ~10,565 |
+
+The +10%-over-Rust figure means *beating* upstream Java by ~8% (PXD) / ~4% (TMT)
+— the same kind of edge msgf-rust already holds on Astral DDA (+9.8%: 36,715 vs
+33,425). It is a **stretch target**, not a hard gate (see "Success criteria").
+
+Constraints:
+- **No wall-time regression beyond noise (~3%)** on any of the three datasets.
+- **No PSM regression on Astral** (currently 36,715 @1% FDR — already beating Java).
+
+## Background
+
+The I5 trace investigation (`docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md`,
+merged via PR #37) localized the Rust↔Java per-PSM scoring divergence on
+PXD001819. The gap is dominated by **label flips**: Rust ranks a different
+(usually wrong) top-1 peptide than Java because its scoring over-scores weaker
+peptides by +5..+20 RawScore and under-scores Java's favored peptide by ~13.
+Three roughly-equal contributors, all in the hot scoring path:
+
+- **H1 — ion-type enumeration** (~10% of divergent ions): Rust scores some ions Java doesn't.
+- **H2 — peak-rank assignment** (~40%): when several peaks fall inside the
+  fragment-tolerance window, the selected peak / assigned rank differs from Java's.
+- **H3 — per-rank log-probability lookups** (~40%): a different log-prob value is
+  returned even when Rust and Java agree on the rank.
+
+The project's empirical audit pattern (n≈9+, recorded in workspace memory):
+- **Additive** changes (new PIN columns) are *safe* but historically *flat* —
+  Percolator already extracts most of that signal via correlated columns.
+- **Modifying-existing-distribution** changes usually *regress* Percolator @1% FDR
+  even when individually "more correct" — UNLESS they move Rust's top-1 selection
+  *toward Java's*. The three biggest historical gains were all top-1-restoring
+  changes: iter20 high-res tolerance fix (+4,650 Astral), iter33 edge-in-ranking
+  (+3,705), iter29 main-ion fix (+379).
+
+This design exploits that pattern: pursue the cheap top-1-restoring config lever
+first, then the hot-path label-flip fix (also top-1-restoring), keeping additive
+features as a zero-risk safety net.
+
+## Relevant code (from the 2026-05-28 code map)
+
+- Peak rank assignment (H2): `crates/scoring/src/scoring/scored_spectrum.rs`
+  - `ScoredSpectrum::new` (~L168–357): assigns ranks intensity-desc then m/z-asc;
+    precursor-filtered peaks excluded before ranking.
+  - `nearest_peak_rank_in` (~L897–918): binary search + linear scan in tolerance
+    window, selects **highest intensity** (strict `>`, so first peak wins on ties).
+  - `directional_node_score_inner` (~L715–750): tolerance is `mme.as_da(theo_mz)`.
+- Log-prob tables (H3): `crates/scoring/src/scoring/rank_scorer.rs`
+  - `node_score` / hot-path indexing: `idx = min(rank, max_rank).max(1) - 1`;
+    missing ion uses the `max_rank` sentinel slot.
+  - `error_score`, `ion_existence_score`.
+- Ion-type enumeration (H1): `crates/scoring/src/param_model.rs`
+  - `build_partition_ion_types_cache`, `partition_for` / `find_partition`.
+- RawScore assembly: `crates/scoring/src/scoring/psm_score.rs::score_psm`;
+  `pin_score` (= node + cleavage) vs `rank_score` (= node + cleavage + edge) split
+  in `crates/search/src/match_engine.rs` (~L408–448).
+- Instrument/tolerance resolution: `crates/msgf-rust/src/bin/msgf-rust.rs` (~L585–631);
+  `detect_instrument_type` in `crates/input/src/mzml.rs`;
+  `InstrumentType::is_high_resolution()` gates the 20-ppm vs 0.5-Da feature tolerance
+  in `compute_psm_features` (`crates/search/src/match_engine.rs`).
+- PIN columns: `crates/output/src/pin.rs::write_header` / `write_psm_row`.
+- Feature struct: `crates/search/src/psm.rs::PsmFeatures`.
+
+## Success criteria
+
+- **Hard gate (per change):** ships only if it gains PXD or TMT @1% FDR, regresses
+  *none* of PXD/TMT/Astral beyond Percolator noise (~±0.3%), and keeps wall within
+  ~3% on all three. Otherwise revert.
+- **Stretch target:** +10% over current Rust on PXD and TMT. Treated as a
+  *direction we bench toward*, NOT a revert-everything gate — we ship every change
+  that net-gains under the hard gate, and report the cumulative result even if it
+  lands below +10%.
+- **Speed:** Phase-2 hot-path changes must show no wall regression beyond noise;
+  revert if they do, even if PSMs gain.
+
+## Bench protocol (every phase)
+
+Reuse the established VM harness (`/srv/data/msgf-bench/`):
+- Build Rust release with `target-cpu=sandybridge` (the committed `.cargo/config.toml`).
+- Run all 3 datasets, cal=auto, `--top-n 1 --threads 8`, per-dataset
+  tolerance/instrument/protocol matching the README bench.
+- Percolator 3.7.1 via `run_percolator_docker.sh` (`--seed 42 --only-psms`),
+  parse 1%-FDR target count.
+- Compare against the locked baseline (PXD 14,755 / TMT 9,605 / Astral 36,715).
+- One commit per change; revert in place if the hard gate fails. Keep reverts on
+  the branch as record (matches the project's iteration-shipping model).
+
+## Phases
+
+### Phase 0 — Diagnostic (measurement only; no code change)
+
+Determine, for each of PXD001819 and TMT, what the *current* run actually resolves
+to. Output a table:
+
+| Dataset | Resolved instrument | Feature tolerance | Scoring tolerance | Calibration fired? | Dominant activation |
+|---|---|---|---|---|---|
+
+Sources:
+- The Rust run's stderr ("Param resolver: auto-detected ...", "instrument = ...").
+- `InstrumentType::is_high_resolution()` for the resolved instrument → 20 ppm vs 0.5 Da.
+- The MassCalibrator log line (fired vs skipped, and the <confident-PSM guard count).
+- The mzML `<analyzer>` cvParam (FTMS/Orbitrap/ITMS) to know the *true* instrument.
+
+**Decision:** if PXD/TMT are high-res but resolving to the 0.5-Da low-res tolerance,
+Phase 1a has the iter20 win available. If they're already on the high-res path,
+Phase 1a yields ~0 and we proceed to Phase 1b / Phase 2.
+
+### Phase 1 — Config levers (low speed risk)
+
+**1a — Instrument / fragment tolerance.** If Phase 0 shows a low-res/high-res
+mismatch on the *tolerance* path, fix instrument resolution so the 20-ppm
+high-res branch engages in both `compute_psm_features` and the scoring tolerance.
+Likely a fix to how `detect_instrument_type` feeds `InstrumentType::is_high_resolution()`,
+or a default change. Bench-gate.
+
+**1b — Calibration.** If Phase 0 shows `--precursor-cal auto` skipping on PXD/TMT
+(the historical guard skips when <200 confident PSMs), investigate the guard
+threshold and whether calibration would add PSMs (it did on Astral). Bench-gate.
+
+### Phase 2 — Label-flip gap (hot-path; bench-gated, revert-ready)
+
+**2a — H2 peak-rank assignment.** Using the I5 artifacts (scan 41522,
+`R.DPANLPWASLNIDIAIDSTGVFK.E`), find the first RANK_DIFF ion (theo_mz, rust_rank,
+java_rank). Trace both peak-selection paths:
+- Rust `nearest_peak_rank_in`: intensity-max in window, strict `>`.
+- Java `getPeakByMass`: identify its tie-break (first peak / closest m/z / index order).
+Make Rust match Java's rule. Re-run the I5 trace harness; confirm RANK_DIFF count
+drops and (by the H2→H3 coupling) LOGPROB_DIFF drops proportionally. Bench-gate on
+PXD + TMT + Astral.
+
+**2b — H3 log-prob indexing.** Only if 2a lands and the bench gate passes. Close
+the residual same-rank/different-value cases (307 of 608 in the I5 data). Likely a
+table-indexing or clamping difference in `rank_scorer.rs`. Bench-gate.
+
+H1 (ion enumeration) is deferred unless 2a/2b leave a clear ion-enumeration residual
+in the re-run trace.
+
+### Phase 3 — Additive PIN features (safety net)
+
+Only if Phases 1–2 fall short of the stretch target. Add new PIN columns (no change
+to existing column values):
+- `MeanMatchedRank` — average `nearest_peak_rank` over matched b/y ions.
+- `ScoreFractionTop1Split` — max single-split contribution / RawScore (requires
+  keeping the max split alongside the sum in `score_psm`).
+- `ln(num_distinct)` — already computed for E-value; expose as a column.
+Bench-gate; expected flat-to-small.
+
+## Risks & mitigations
+
+- **Phase 2 regresses Percolator** (n=9+ audit): bench-gate per dataset, revert in
+  place. The I5 trace re-run is the leading indicator (RANK_DIFF should drop) before
+  trusting the Percolator delta.
+- **Phase 2 slows the hot path:** measure wall on all 3 datasets; revert if >3%.
+- **+10% unreachable via scoring alone:** acknowledged up front; success criteria
+  treat +10% as a direction, not a revert-all gate. Worst case we close the
+  gap-to-Java and modestly beat it, and ship the net-positive subset.
+- **Astral regression:** every change is checked against Astral, which already
+  wins — a change that helps PXD/TMT but breaks Astral is reverted.
+
+## Out of scope
+
+- Astral (already beats Java; only a regression guard here).
+- Algorithmic rewrites of the GF DP or candidate enumeration.
+- New scoring models / `.param` retraining.
+- The README bench-table PR (#39, separate).